5 files changed, 116 insertions, 100 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 0d4d5ed20f..b41c75e926 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.2 2003/01/12 04:03:34 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.3 2003/06/22 22:04:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -19,6 +19,8 @@
 #include "executor/executor.h"
 #include "parser/parse_oper.h"
 #include "utils/memutils.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
 
 
 /*****************************************************************************
@@ -213,76 +215,46 @@ execTuplesMatchPrepare(TupleDesc tupdesc,
 	return eqfunctions;
 }
 
-
-/*****************************************************************************
- *		Utility routines for hashing
- *****************************************************************************/
-
 /*
- * ComputeHashFunc
+ * execTuplesHashPrepare
+ *		Look up the equality and hashing functions needed for a TupleHashTable.
  *
- *		the hash function for hash joins (also used for hash aggregation)
- *
- *		XXX this probably ought to be replaced with datatype-specific
- *		hash functions, such as those already implemented for hash indexes.
+ * This is similar to execTuplesMatchPrepare, but we also need to find the
+ * hash functions associated with the equality operators.  *eqfunctions and
+ * *hashfunctions receive the palloc'd result arrays.
  */
-uint32
-ComputeHashFunc(Datum key, int typLen, bool byVal)
+void
+execTuplesHashPrepare(TupleDesc tupdesc,
+					  int numCols,
+					  AttrNumber *matchColIdx,
+					  FmgrInfo **eqfunctions,
+					  FmgrInfo **hashfunctions)
 {
-	unsigned char *k;
+	int			i;
 
-	if (byVal)
-	{
-		/*
-		 * If it's a by-value data type, just hash the whole Datum value.
-		 * This assumes that datatypes narrower than Datum are
-		 * consistently padded (either zero-extended or sign-extended, but
-		 * not random bits) to fill Datum; see the XXXGetDatum macros in
-		 * postgres.h. NOTE: it would not work to do hash_any(&key, len)
-		 * since this would get the wrong bytes on a big-endian machine.
-		 */
-		k = (unsigned char *) &key;
-		typLen = sizeof(Datum);
-	}
-	else
+	*eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+	*hashfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
+
+	for (i = 0; i < numCols; i++)
 	{
-		if (typLen > 0)
-		{
-			/* fixed-width pass-by-reference type */
-			k = (unsigned char *) DatumGetPointer(key);
-		}
-		else if (typLen == -1)
-		{
-			/*
-			 * It's a varlena type, so 'key' points to a "struct varlena".
-			 * NOTE: VARSIZE returns the "real" data length plus the
-			 * sizeof the "vl_len" attribute of varlena (the length
-			 * information). 'key' points to the beginning of the varlena
-			 * struct, so we have to use "VARDATA" to find the beginning
-			 * of the "real" data.	Also, we have to be careful to detoast
-			 * the datum if it's toasted.  (We don't worry about freeing
-			 * the detoasted copy; that happens for free when the
-			 * per-tuple memory context is reset in ExecHashGetBucket.)
-			 */
-			struct varlena *vkey = PG_DETOAST_DATUM(key);
-
-			typLen = VARSIZE(vkey) - VARHDRSZ;
-			k = (unsigned char *) VARDATA(vkey);
-		}
-		else if (typLen == -2)
-		{
-			/* It's a null-terminated C string */
-			typLen = strlen(DatumGetCString(key)) + 1;
-			k = (unsigned char *) DatumGetPointer(key);
-		}
-		else
-		{
-			elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen);
-			k = NULL;			/* keep compiler quiet */
-		}
+		AttrNumber	att = matchColIdx[i];
+		Oid			typid = tupdesc->attrs[att - 1]->atttypid;
+		Operator	optup;
+		Oid			eq_opr;
+		Oid			eq_function;
+		Oid			hash_function;
+
+		optup = equality_oper(typid, false);
+		eq_opr = oprid(optup);
+		eq_function = oprfuncid(optup);
+		ReleaseSysCache(optup);
+		hash_function = get_op_hash_function(eq_opr);
+		if (!OidIsValid(hash_function))
+			elog(ERROR, "Could not find hash function for hash operator %u",
+				 eq_opr);
+		fmgr_info(eq_function, &(*eqfunctions)[i]);
+		fmgr_info(hash_function, &(*hashfunctions)[i]);
 	}
-
-	return DatumGetUInt32(hash_any(k, typLen));
 }
 
 
@@ -299,19 +271,21 @@ ComputeHashFunc(Datum key, int typLen, bool byVal)
  *
  *	numCols, keyColIdx: identify the tuple fields to use as lookup key
  *	eqfunctions: equality comparison functions to use
+ *	hashfunctions: datatype-specific hashing functions to use
  *	nbuckets: number of buckets to make
  *	entrysize: size of each entry (at least sizeof(TupleHashEntryData))
  *	tablecxt: memory context in which to store table and table entries
  *	tempcxt: short-lived context for evaluation hash and comparison functions
  *
- * The eqfunctions array may be made with execTuplesMatchPrepare().
+ * The function arrays may be made with execTuplesHashPrepare().
  *
- * Note that keyColIdx and eqfunctions must be allocated in storage that
- * will live as long as the hashtable does.
+ * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
+ * storage that will live as long as the hashtable does.
  */
 TupleHashTable
 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 					FmgrInfo *eqfunctions,
+					FmgrInfo *hashfunctions,
 					int nbuckets, Size entrysize,
 					MemoryContext tablecxt, MemoryContext tempcxt)
 {
@@ -328,6 +302,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
 	hashtable->eqfunctions = eqfunctions;
+	hashtable->hashfunctions = hashfunctions;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
@@ -375,11 +350,15 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
 
 		attr = heap_getattr(tuple, att, tupdesc, &isNull);
-		if (isNull)
-			continue;			/* treat nulls as having hash key 0 */
-		hashkey ^= ComputeHashFunc(attr,
-								   (int) tupdesc->attrs[att - 1]->attlen,
-								   tupdesc->attrs[att - 1]->attbyval);
+
+		if (!isNull)			/* treat nulls as having hash key 0 */
+		{
+			uint32		hkey;
+
+			hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
+												attr));
+			hashkey ^= hkey;
+		}
 	}
 	bucketno = hashkey % (uint32) hashtable->nbuckets;
 
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index f2499cb4e5..d0dd6b31c9 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -45,7 +45,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.106 2003/06/06 15:04:01 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeAgg.c,v 1.107 2003/06/22 22:04:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -582,6 +582,7 @@ build_hash_table(AggState *aggstate)
 	aggstate->hashtable = BuildTupleHashTable(node->numCols,
 											  node->grpColIdx,
 											  aggstate->eqfunctions,
+											  aggstate->hashfunctions,
 											  node->numGroups,
 											  entrysize,
 											  aggstate->aggcontext,
@@ -1035,6 +1036,7 @@ ExecInitAgg(Agg *node, EState *estate)
 	aggstate->aggs = NIL;
 	aggstate->numaggs = 0;
 	aggstate->eqfunctions = NULL;
+	aggstate->hashfunctions = NULL;
 	aggstate->peragg = NULL;
 	aggstate->agg_done = false;
 	aggstate->pergroup = NULL;
@@ -1123,14 +1125,23 @@ ExecInitAgg(Agg *node, EState *estate)
 	}
 
 	/*
-	 * If we are grouping, precompute fmgr lookup data for inner loop
+	 * If we are grouping, precompute fmgr lookup data for inner loop.
+	 * We need both equality and hashing functions to do it by hashing,
+	 * but only equality if not hashing.
 	 */
 	if (node->numCols > 0)
 	{
-		aggstate->eqfunctions =
-			execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
-								   node->numCols,
-								   node->grpColIdx);
+		if (node->aggstrategy == AGG_HASHED)
+			execTuplesHashPrepare(ExecGetScanType(&aggstate->ss),
+								  node->numCols,
+								  node->grpColIdx,
+								  &aggstate->eqfunctions,
+								  &aggstate->hashfunctions);
+		else
+			aggstate->eqfunctions =
+				execTuplesMatchPrepare(ExecGetScanType(&aggstate->ss),
+									   node->numCols,
+									   node->grpColIdx);
 	}
 
 	/*
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index b338c8961e..f00cc28684 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.75 2003/03/27 16:51:27 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.76 2003/06/22 22:04:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -192,7 +192,7 @@ ExecEndHash(HashState *node)
  * ----------------------------------------------------------------
  */
 HashJoinTable
-ExecHashTableCreate(Hash *node)
+ExecHashTableCreate(Hash *node, List *hashOperators)
 {
 	HashJoinTable hashtable;
 	Plan	   *outerNode;
@@ -201,7 +201,7 @@ ExecHashTableCreate(Hash *node)
 	int			nbatch;
 	int			nkeys;
 	int			i;
-	List	   *hk;
+	List	   *ho;
 	MemoryContext oldcxt;
 
 	/*
@@ -237,17 +237,20 @@ ExecHashTableCreate(Hash *node)
 	hashtable->outerBatchSize = NULL;
 
 	/*
-	 * Get info about the datatypes of the hash keys.
+	 * Get info about the hash functions to be used for each hash key.
 	 */
-	nkeys = length(node->hashkeys);
-	hashtable->typLens = (int16 *) palloc(nkeys * sizeof(int16));
-	hashtable->typByVals = (bool *) palloc(nkeys * sizeof(bool));
+	nkeys = length(hashOperators);
+	hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
 	i = 0;
-	foreach(hk, node->hashkeys)
+	foreach(ho, hashOperators)
 	{
-		get_typlenbyval(exprType(lfirst(hk)),
-						&hashtable->typLens[i],
-						&hashtable->typByVals[i]);
+		Oid		hashfn;
+
+		hashfn = get_op_hash_function(lfirsto(ho));
+		if (!OidIsValid(hashfn))
+			elog(ERROR, "Could not find hash function for hash operator %u",
+				 lfirsto(ho));
+		fmgr_info(hashfn, &hashtable->hashfunctions[i]);
 		i++;
 	}
 
@@ -520,7 +523,7 @@ ExecHashGetBucket(HashJoinTable hashtable,
 
 	/*
 	 * We reset the eval context each time to reclaim any memory leaked in
-	 * the hashkey expressions or ComputeHashFunc itself.
+	 * the hashkey expressions.
 	 */
 	ResetExprContext(econtext);
 
@@ -545,9 +548,11 @@ ExecHashGetBucket(HashJoinTable hashtable,
 		 */
 		if (!isNull)			/* treat nulls as having hash key 0 */
 		{
-			hashkey ^= ComputeHashFunc(keyval,
-									   (int) hashtable->typLens[i],
-									   hashtable->typByVals[i]);
+			uint32		hkey;
+
+			hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
+												keyval));
+			hashkey ^= hkey;
 		}
 
 		i++;
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 17585b2f0f..9a0071f018 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.51 2003/05/30 20:23:10 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.52 2003/06/22 22:04:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -117,7 +117,8 @@ ExecHashJoin(HashJoinState *node)
 		 * create the hash table
 		 */
 		Assert(hashtable == NULL);
-		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
+		hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
+										node->hj_HashOperators);
 		node->hj_HashTable = hashtable;
 
 		/*
@@ -305,6 +306,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
 	Plan	   *outerNode;
 	Hash	   *hashNode;
 	List	   *hclauses;
+	List	   *hoperators;
 	List	   *hcl;
 
 	/*
@@ -406,8 +408,9 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
 
 	/*
 	 * The planner already made a list of the inner hashkeys for us,
-	 * but we also need a list of the outer hashkeys.  Each list of
-	 * exprs must then be prepared for execution.
+	 * but we also need a list of the outer hashkeys, as well as a list
+	 * of the hash operator OIDs.  Both lists of exprs must then be prepared
+	 * for execution.
 	 */
 	hjstate->hj_InnerHashKeys = (List *)
 		ExecInitExpr((Expr *) hashNode->hashkeys,
@@ -416,13 +419,19 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
 		hjstate->hj_InnerHashKeys;
 
 	hclauses = NIL;
+	hoperators = NIL;
 	foreach(hcl, node->hashclauses)
 	{
-		hclauses = lappend(hclauses, get_leftop(lfirst(hcl)));
+		OpExpr	   *hclause = (OpExpr *) lfirst(hcl);
+
+		Assert(IsA(hclause, OpExpr));
+		hclauses = lappend(hclauses, get_leftop((Expr *) hclause));
+		hoperators = lappendo(hoperators, hclause->opno);
 	}
 	hjstate->hj_OuterHashKeys = (List *)
 		ExecInitExpr((Expr *) hclauses,
 					 (PlanState *) hjstate);
+	hjstate->hj_HashOperators = hoperators;
 
 	hjstate->js.ps.ps_OuterTupleSlot = NULL;
 	hjstate->js.ps.ps_TupFromTlist = false;
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index ff5d03faf8..82502c985e 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.46 2003/06/06 15:04:01 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/executor/nodeSubplan.c,v 1.47 2003/06/22 22:04:54 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -519,6 +519,7 @@ buildSubPlanHash(SubPlanState *node)
 	node->hashtable = BuildTupleHashTable(ncols,
 										  node->keyColIdx,
 										  node->eqfunctions,
+										  node->hashfunctions,
 										  nbuckets,
 										  sizeof(TupleHashEntryData),
 										  node->tablecxt,
@@ -537,6 +538,7 @@ buildSubPlanHash(SubPlanState *node)
 		node->hashnulls = BuildTupleHashTable(ncols,
 											  node->keyColIdx,
 											  node->eqfunctions,
+											  node->hashfunctions,
 											  nbuckets,
 											  sizeof(TupleHashEntryData),
 											  node->tablecxt,
@@ -700,6 +702,7 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
 	node->innerecontext = NULL;
 	node->keyColIdx = NULL;
 	node->eqfunctions = NULL;
+	node->hashfunctions = NULL;
 
 	/*
 	 * create an EState for the subplan
@@ -797,11 +800,12 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
 		 * ExecTypeFromTL).
 		 *
 		 * We also extract the combining operators themselves to initialize
-		 * the equality functions for the hash tables.
+		 * the equality and hashing functions for the hash tables.
 		 */
 		lefttlist = righttlist = NIL;
 		leftptlist = rightptlist = NIL;
 		node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->hashfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
 		i = 1;
 		foreach(lexpr, node->exprs)
 		{
@@ -811,6 +815,7 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
 			Expr	   *expr;
 			TargetEntry *tle;
 			GenericExprState *tlestate;
+			Oid			hashfn;
 
 			Assert(IsA(fstate, FuncExprState));
 			Assert(IsA(opexpr, OpExpr));
@@ -850,6 +855,13 @@ ExecInitSubPlan(SubPlanState *node, EState *estate)
 			fmgr_info(opexpr->opfuncid, &node->eqfunctions[i-1]);
 			node->eqfunctions[i-1].fn_expr = (Node *) opexpr;
 
+			/* Lookup the associated hash function */
+			hashfn = get_op_hash_function(opexpr->opno);
+			if (!OidIsValid(hashfn))
+				elog(ERROR, "Could not find hash function for hash operator %u",
+					 opexpr->opno);
+			fmgr_info(hashfn, &node->hashfunctions[i-1]);
+
 			i++;
 		}