summaryrefslogtreecommitdiff
path: root/src/backend/optimizer
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2002-11-30 00:08:22 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2002-11-30 00:08:22 +0000
commitddb2d78de0172b1f3a00c8e3bf35345af9952f43 (patch)
tree75aaa2922e21b78514cd592241c1718a2e6a4ba8 /src/backend/optimizer
parentf68f11928d5c791873073c882775dae10283ff49 (diff)
downloadpostgresql-ddb2d78de0172b1f3a00c8e3bf35345af9952f43.tar.gz
Upgrade planner and executor to allow multiple hash keys for a hash join,
instead of only one. This should speed up planning (only one hash path to consider for a given pair of relations) as well as allow more effective hashing, when there are multiple hashable joinclauses.
Diffstat (limited to 'src/backend/optimizer')
-rw-r--r--src/backend/optimizer/path/costsize.c84
-rw-r--r--src/backend/optimizer/path/joinpath.c30
-rw-r--r--src/backend/optimizer/plan/createplan.c30
-rw-r--r--src/backend/optimizer/plan/subselect.c4
-rw-r--r--src/backend/optimizer/util/pathnode.c4
5 files changed, 87 insertions, 65 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 6cf8b2af4b..fbdeea414c 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -819,7 +819,7 @@ cost_mergejoin(Path *path, Query *root,
* 'outer_path' is the path for the outer relation
* 'inner_path' is the path for the inner relation
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrictlist)
*/
void
@@ -838,10 +838,8 @@ cost_hashjoin(Path *path, Query *root,
double innerbytes = relation_byte_size(inner_path->parent->rows,
inner_path->parent->width);
long hashtablebytes = SortMem * 1024L;
- RestrictInfo *restrictinfo;
- Var *left,
- *right;
Selectivity innerbucketsize;
+ List *hcl;
if (!enable_hashjoin)
startup_cost += disable_cost;
@@ -856,43 +854,57 @@ cost_hashjoin(Path *path, Query *root,
run_cost += cpu_operator_cost * outer_path->parent->rows;
/*
- * Determine bucketsize fraction for inner relation. First we have to
- * figure out which side of the hashjoin clause is the inner side.
+ * Determine bucketsize fraction for inner relation. We use the
+ * smallest bucketsize estimated for any individual hashclause;
+ * this is undoubtedly conservative.
*/
- Assert(length(hashclauses) == 1);
- Assert(IsA(lfirst(hashclauses), RestrictInfo));
- restrictinfo = (RestrictInfo *) lfirst(hashclauses);
- /* these must be OK, since check_hashjoinable accepted the clause */
- left = get_leftop(restrictinfo->clause);
- right = get_rightop(restrictinfo->clause);
-
- /*
- * Since we tend to visit the same clauses over and over when planning
- * a large query, we cache the bucketsize estimate in the RestrictInfo
- * node to avoid repeated lookups of statistics.
- */
- if (VARISRELMEMBER(right->varno, inner_path->parent))
+ innerbucketsize = 1.0;
+ foreach(hcl, hashclauses)
{
- /* righthand side is inner */
- innerbucketsize = restrictinfo->right_bucketsize;
- if (innerbucketsize < 0)
+ RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
+ Var *left,
+ *right;
+ Selectivity thisbucketsize;
+
+ Assert(IsA(restrictinfo, RestrictInfo));
+ /* these must be OK, since check_hashjoinable accepted the clause */
+ left = get_leftop(restrictinfo->clause);
+ right = get_rightop(restrictinfo->clause);
+
+ /*
+ * First we have to figure out which side of the hashjoin clause is the
+ * inner side.
+ *
+ * Since we tend to visit the same clauses over and over when planning
+ * a large query, we cache the bucketsize estimate in the RestrictInfo
+ * node to avoid repeated lookups of statistics.
+ */
+ if (VARISRELMEMBER(right->varno, inner_path->parent))
{
- /* not cached yet */
- innerbucketsize = estimate_hash_bucketsize(root, right);
- restrictinfo->right_bucketsize = innerbucketsize;
+ /* righthand side is inner */
+ thisbucketsize = restrictinfo->right_bucketsize;
+ if (thisbucketsize < 0)
+ {
+ /* not cached yet */
+ thisbucketsize = estimate_hash_bucketsize(root, right);
+ restrictinfo->right_bucketsize = thisbucketsize;
+ }
}
- }
- else
- {
- Assert(VARISRELMEMBER(left->varno, inner_path->parent));
- /* lefthand side is inner */
- innerbucketsize = restrictinfo->left_bucketsize;
- if (innerbucketsize < 0)
+ else
{
- /* not cached yet */
- innerbucketsize = estimate_hash_bucketsize(root, left);
- restrictinfo->left_bucketsize = innerbucketsize;
+ Assert(VARISRELMEMBER(left->varno, inner_path->parent));
+ /* lefthand side is inner */
+ thisbucketsize = restrictinfo->left_bucketsize;
+ if (thisbucketsize < 0)
+ {
+ /* not cached yet */
+ thisbucketsize = estimate_hash_bucketsize(root, left);
+ restrictinfo->left_bucketsize = thisbucketsize;
+ }
}
+
+ if (innerbucketsize > thisbucketsize)
+ innerbucketsize = thisbucketsize;
}
/*
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index ac5d4a72d4..6069a34d87 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.72 2002/11/24 21:52:14 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -701,7 +701,7 @@ match_unsorted_inner(Query *root,
/*
* hash_inner_and_outer
* Create hashjoin join paths by explicitly hashing both the outer and
- * inner join relations of each available hash clause.
+ * inner keys of each available hash clause.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
@@ -719,6 +719,7 @@ hash_inner_and_outer(Query *root,
JoinType jointype)
{
bool isouterjoin;
+ List *hashclauses;
List *i;
/*
@@ -737,20 +738,18 @@ hash_inner_and_outer(Query *root,
}
/*
+ * We need to build only one hashpath for any given pair of outer and
+ * inner relations; all of the hashable clauses will be used as keys.
+ *
* Scan the join's restrictinfo list to find hashjoinable clauses that
- * are usable with this pair of sub-relations. Since we currently
- * accept only var-op-var clauses as hashjoinable, we need only check
- * the membership of the vars to determine whether a particular clause
- * can be used with this pair of sub-relations. This code would need
- * to be upgraded if we wanted to allow more-complex expressions in
- * hash joins.
+ * are usable with this pair of sub-relations.
*/
+ hashclauses = NIL;
foreach(i, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Var *left,
*right;
- List *hashclauses;
if (restrictinfo->hashjoinoperator == InvalidOid)
continue; /* not hashjoinable */
@@ -768,6 +767,12 @@ hash_inner_and_outer(Query *root,
/*
* Check if clause is usable with these input rels.
+ *
+ * Since we currently accept only var-op-var clauses as hashjoinable,
+ * we need only check the membership of the vars to determine whether
+ * a particular clause can be used with this pair of sub-relations.
+ * This code would need to be upgraded if we wanted to allow
+ * more-complex expressions in hash joins.
*/
if (VARISRELMEMBER(left->varno, outerrel) &&
VARISRELMEMBER(right->varno, innerrel))
@@ -782,9 +787,12 @@ hash_inner_and_outer(Query *root,
else
continue; /* no good for these input relations */
- /* always a one-element list of hash clauses */
- hashclauses = makeList1(restrictinfo);
+ hashclauses = lappend(hashclauses, restrictinfo);
+ }
+ /* If we found any usable hashclauses, make a path */
+ if (hashclauses)
+ {
/*
* We consider both the cheapest-total-cost and
* cheapest-startup-cost outer paths. There's no need to consider
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index b393252542..d43e3271fb 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.125 2002/11/30 00:08:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -91,7 +91,7 @@ static HashJoin *make_hashjoin(List *tlist,
List *hashclauses,
Plan *lefttree, Plan *righttree,
JoinType jointype);
-static Hash *make_hash(List *tlist, Node *hashkey, Plan *lefttree);
+static Hash *make_hash(List *tlist, List *hashkeys, Plan *lefttree);
static MergeJoin *make_mergejoin(List *tlist,
List *joinclauses, List *otherclauses,
List *mergeclauses,
@@ -910,14 +910,9 @@ create_hashjoin_plan(Query *root,
List *hashclauses;
HashJoin *join_plan;
Hash *hash_plan;
- Node *innerhashkey;
+ List *innerhashkeys;
+ List *hcl;
- /*
- * NOTE: there will always be exactly one hashclause in the list
- * best_path->path_hashclauses (cf. hash_inner_and_outer()). We
- * represent it as a list anyway, for convenience with routines that
- * want to work on lists of clauses.
- */
hashclauses = get_actual_clauses(best_path->path_hashclauses);
/*
@@ -950,13 +945,20 @@ create_hashjoin_plan(Query *root,
inner_tlist,
(Index) 0));
- /* Now the righthand op of the sole hashclause is the inner hash key. */
- innerhashkey = (Node *) get_rightop(lfirst(hashclauses));
+ /*
+ * Extract the inner hash keys (right-hand operands of the hashclauses)
+ * to put in the Hash node.
+ */
+ innerhashkeys = NIL;
+ foreach(hcl, hashclauses)
+ {
+ innerhashkeys = lappend(innerhashkeys, get_rightop(lfirst(hcl)));
+ }
/*
* Build the hash node and hash join node.
*/
- hash_plan = make_hash(inner_tlist, innerhashkey, inner_plan);
+ hash_plan = make_hash(inner_tlist, innerhashkeys, inner_plan);
join_plan = make_hashjoin(tlist,
joinclauses,
otherclauses,
@@ -1511,7 +1513,7 @@ make_hashjoin(List *tlist,
}
static Hash *
-make_hash(List *tlist, Node *hashkey, Plan *lefttree)
+make_hash(List *tlist, List *hashkeys, Plan *lefttree)
{
Hash *node = makeNode(Hash);
Plan *plan = &node->plan;
@@ -1528,7 +1530,7 @@ make_hash(List *tlist, Node *hashkey, Plan *lefttree)
plan->qual = NULL;
plan->lefttree = lefttree;
plan->righttree = NULL;
- node->hashkey = hashkey;
+ node->hashkeys = hashkeys;
return node;
}
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 5b171fb819..61476a6560 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.56 2002/11/26 03:01:58 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.57 2002/11/30 00:08:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -677,7 +677,7 @@ SS_finalize_plan(Plan *plan, List *rtable)
break;
case T_Hash:
- finalize_primnode(((Hash *) plan)->hashkey,
+ finalize_primnode((Node *) ((Hash *) plan)->hashkeys,
&results);
break;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index e99435a6ed..9822735560 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.80 2002/11/24 21:52:14 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.81 2002/11/30 00:08:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -616,7 +616,7 @@ create_mergejoin_path(Query *root,
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
- * 'hashclauses' is a list of the hash join clause (always a 1-element list)
+ * 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
*/
HashPath *