diff options
| author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-30 00:08:22 +0000 |
|---|---|---|
| committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-11-30 00:08:22 +0000 |
| commit | ddb2d78de0172b1f3a00c8e3bf35345af9952f43 (patch) | |
| tree | 75aaa2922e21b78514cd592241c1718a2e6a4ba8 /src/backend/optimizer/path | |
| parent | f68f11928d5c791873073c882775dae10283ff49 (diff) | |
| download | postgresql-ddb2d78de0172b1f3a00c8e3bf35345af9952f43.tar.gz | |
Upgrade planner and executor to allow multiple hash keys for a hash join,
instead of only one. This should speed up planning (only one hash path
to consider for a given pair of relations) as well as allow more effective
hashing, when there are multiple hashable joinclauses.
Diffstat (limited to 'src/backend/optimizer/path')
| -rw-r--r-- | src/backend/optimizer/path/costsize.c | 84 | ||||
| -rw-r--r-- | src/backend/optimizer/path/joinpath.c | 30 |
2 files changed, 67 insertions, 47 deletions
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index 6cf8b2af4b..fbdeea414c 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -42,7 +42,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -819,7 +819,7 @@ cost_mergejoin(Path *path, Query *root, * 'outer_path' is the path for the outer relation * 'inner_path' is the path for the inner relation * 'restrictlist' are the RestrictInfo nodes to be applied at the join - * 'hashclauses' is a list of the hash join clause (always a 1-element list) + * 'hashclauses' are the RestrictInfo nodes to use as hash clauses * (this should be a subset of the restrictlist) */ void @@ -838,10 +838,8 @@ cost_hashjoin(Path *path, Query *root, double innerbytes = relation_byte_size(inner_path->parent->rows, inner_path->parent->width); long hashtablebytes = SortMem * 1024L; - RestrictInfo *restrictinfo; - Var *left, - *right; Selectivity innerbucketsize; + List *hcl; if (!enable_hashjoin) startup_cost += disable_cost; @@ -856,43 +854,57 @@ cost_hashjoin(Path *path, Query *root, run_cost += cpu_operator_cost * outer_path->parent->rows; /* - * Determine bucketsize fraction for inner relation. First we have to - * figure out which side of the hashjoin clause is the inner side. + * Determine bucketsize fraction for inner relation. We use the + * smallest bucketsize estimated for any individual hashclause; + * this is undoubtedly conservative. */ - Assert(length(hashclauses) == 1); - Assert(IsA(lfirst(hashclauses), RestrictInfo)); - restrictinfo = (RestrictInfo *) lfirst(hashclauses); - /* these must be OK, since check_hashjoinable accepted the clause */ - left = get_leftop(restrictinfo->clause); - right = get_rightop(restrictinfo->clause); - - /* - * Since we tend to visit the same clauses over and over when planning - * a large query, we cache the bucketsize estimate in the RestrictInfo - * node to avoid repeated lookups of statistics. - */ - if (VARISRELMEMBER(right->varno, inner_path->parent)) + innerbucketsize = 1.0; + foreach(hcl, hashclauses) { - /* righthand side is inner */ - innerbucketsize = restrictinfo->right_bucketsize; - if (innerbucketsize < 0) + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl); + Var *left, + *right; + Selectivity thisbucketsize; + + Assert(IsA(restrictinfo, RestrictInfo)); + /* these must be OK, since check_hashjoinable accepted the clause */ + left = get_leftop(restrictinfo->clause); + right = get_rightop(restrictinfo->clause); + + /* + * First we have to figure out which side of the hashjoin clause is the + * inner side. + * + * Since we tend to visit the same clauses over and over when planning + * a large query, we cache the bucketsize estimate in the RestrictInfo + * node to avoid repeated lookups of statistics. + */ + if (VARISRELMEMBER(right->varno, inner_path->parent)) { - /* not cached yet */ - innerbucketsize = estimate_hash_bucketsize(root, right); - restrictinfo->right_bucketsize = innerbucketsize; + /* righthand side is inner */ + thisbucketsize = restrictinfo->right_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + thisbucketsize = estimate_hash_bucketsize(root, right); + restrictinfo->right_bucketsize = thisbucketsize; + } } - } - else - { - Assert(VARISRELMEMBER(left->varno, inner_path->parent)); - /* lefthand side is inner */ - innerbucketsize = restrictinfo->left_bucketsize; - if (innerbucketsize < 0) + else { - /* not cached yet */ - innerbucketsize = estimate_hash_bucketsize(root, left); - restrictinfo->left_bucketsize = innerbucketsize; + Assert(VARISRELMEMBER(left->varno, inner_path->parent)); + /* lefthand side is inner */ + thisbucketsize = restrictinfo->left_bucketsize; + if (thisbucketsize < 0) + { + /* not cached yet */ + thisbucketsize = estimate_hash_bucketsize(root, left); + restrictinfo->left_bucketsize = thisbucketsize; + } } + + if (innerbucketsize > thisbucketsize) + innerbucketsize = thisbucketsize; } /* diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c index ac5d4a72d4..6069a34d87 100644 --- a/src/backend/optimizer/path/joinpath.c +++ b/src/backend/optimizer/path/joinpath.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.72 2002/11/24 21:52:14 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -701,7 +701,7 @@ match_unsorted_inner(Query *root, /* * hash_inner_and_outer * Create hashjoin join paths by explicitly hashing both the outer and - * inner join relations of each available hash clause. + * inner keys of each available hash clause. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation @@ -719,6 +719,7 @@ hash_inner_and_outer(Query *root, JoinType jointype) { bool isouterjoin; + List *hashclauses; List *i; /* @@ -737,20 +738,18 @@ hash_inner_and_outer(Query *root, } /* + * We need to build only one hashpath for any given pair of outer and + * inner relations; all of the hashable clauses will be used as keys. + * * Scan the join's restrictinfo list to find hashjoinable clauses that - * are usable with this pair of sub-relations. Since we currently - * accept only var-op-var clauses as hashjoinable, we need only check - * the membership of the vars to determine whether a particular clause - * can be used with this pair of sub-relations. This code would need - * to be upgraded if we wanted to allow more-complex expressions in - * hash joins. + * are usable with this pair of sub-relations. */ + hashclauses = NIL; foreach(i, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i); Var *left, *right; - List *hashclauses; if (restrictinfo->hashjoinoperator == InvalidOid) continue; /* not hashjoinable */ @@ -768,6 +767,12 @@ hash_inner_and_outer(Query *root, /* * Check if clause is usable with these input rels. + * + * Since we currently accept only var-op-var clauses as hashjoinable, + * we need only check the membership of the vars to determine whether + * a particular clause can be used with this pair of sub-relations. + * This code would need to be upgraded if we wanted to allow + * more-complex expressions in hash joins. */ if (VARISRELMEMBER(left->varno, outerrel) && VARISRELMEMBER(right->varno, innerrel)) @@ -782,9 +787,12 @@ hash_inner_and_outer(Query *root, else continue; /* no good for these input relations */ - /* always a one-element list of hash clauses */ - hashclauses = makeList1(restrictinfo); + hashclauses = lappend(hashclauses, restrictinfo); + } + /* If we found any usable hashclauses, make a path */ + if (hashclauses) + { /* * We consider both the cheapest-total-cost and * cheapest-startup-cost outer paths. There's no need to consider |
