diff options
Diffstat (limited to 'src/backend/optimizer')
| -rw-r--r-- | src/backend/optimizer/plan/createplan.c | 44 | ||||
| -rw-r--r-- | src/backend/optimizer/plan/initsplan.c | 67 | ||||
| -rw-r--r-- | src/backend/optimizer/plan/planner.c | 107 | ||||
| -rw-r--r-- | src/backend/optimizer/plan/setrefs.c | 20 |
4 files changed, 179 insertions, 59 deletions
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 717fcfa3ce..74e6f237b3 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -10,7 +10,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.122 2002/11/15 02:36:53 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.123 2002/11/19 23:21:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1684,7 +1684,8 @@ make_material(List *tlist, Plan *lefttree) Agg * make_agg(List *tlist, List *qual, AggStrategy aggstrategy, - int ngrp, AttrNumber *grpColIdx, Plan *lefttree) + int ngrp, AttrNumber *grpColIdx, long numGroups, int numAggs, + Plan *lefttree) { Agg *node = makeNode(Agg); Plan *plan = &node->plan; @@ -1692,6 +1693,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, node->aggstrategy = aggstrategy; node->numCols = ngrp; node->grpColIdx = grpColIdx; + node->numGroups = numGroups; copy_plan_costsize(plan, lefttree); @@ -1699,15 +1701,11 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, * Charge one cpu_operator_cost per aggregate function per input * tuple. */ - plan->total_cost += cpu_operator_cost * plan->plan_rows * - (length(pull_agg_clause((Node *) tlist)) + - length(pull_agg_clause((Node *) qual))); + plan->total_cost += cpu_operator_cost * plan->plan_rows * numAggs; /* * We will produce a single output tuple if not grouping, - * and a tuple per group otherwise. For now, estimate the number of - * groups as 10% of the number of tuples --- bogus, but how to do - * better? + * and a tuple per group otherwise. */ if (aggstrategy == AGG_PLAIN) { @@ -1716,10 +1714,7 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy, } else { - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; - node->numGroups = (long) plan->plan_rows; + plan->plan_rows = numGroups; } plan->state = (EState *) NULL; @@ -1735,6 +1730,7 @@ Group * make_group(List *tlist, int ngrp, AttrNumber *grpColIdx, + double numGroups, Plan *lefttree) { Group *node = makeNode(Group); @@ -1748,13 +1744,8 @@ make_group(List *tlist, */ plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp; - /* - * Estimate the number of groups as 10% of the number of tuples - * --- bogus, but how to do better? - */ - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; + /* One output tuple per estimated result group */ + plan->plan_rows = numGroups; plan->state = (EState *) NULL; plan->qual = NULL; @@ -1786,17 +1777,16 @@ make_unique(List *tlist, Plan *lefttree, List *distinctList) /* * Charge one cpu_operator_cost per comparison per input tuple. We - * assume all columns get compared at most of the tuples. + * assume all columns get compared at most of the tuples. (XXX probably + * this is an overestimate.) */ plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols; /* - * As for Group, we make the unsupported assumption that there will be - * 10% as many tuples out as in. + * plan->plan_rows is left as a copy of the input subplan's plan_rows; + * ie, we assume the filter removes nothing. The caller must alter this + * if he has a better idea. */ - plan->plan_rows *= 0.1; - if (plan->plan_rows < 1) - plan->plan_rows = 1; plan->state = (EState *) NULL; plan->targetlist = tlist; @@ -1850,8 +1840,8 @@ make_setop(SetOpCmd cmd, List *tlist, Plan *lefttree, plan->total_cost += cpu_operator_cost * plan->plan_rows * numCols; /* - * As for Group, we make the unsupported assumption that there will be - * 10% as many tuples out as in. + * We make the unsupported assumption that there will be 10% as many + * tuples out as in. Any way to do better? */ plan->plan_rows *= 0.1; if (plan->plan_rows < 1) diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c index e06282c126..e43c52f6df 100644 --- a/src/backend/optimizer/plan/initsplan.c +++ b/src/backend/optimizer/plan/initsplan.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.75 2002/09/04 20:31:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.76 2002/11/19 23:21:58 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -785,6 +785,71 @@ process_implied_equality(Query *root, Node *item1, Node *item2, } /* + * vars_known_equal + * Detect whether two Vars are known equal due to equijoin clauses. + * + * This is not completely accurate since we avoid adding redundant restriction + * clauses to individual base rels (see qual_is_redundant). However, after + * the implied-equality-deduction phase, it is complete for Vars of different + * rels; that's sufficient for planned uses. + */ +bool +vars_known_equal(Query *root, Var *var1, Var *var2) +{ + Index irel1; + Index irel2; + RelOptInfo *rel1; + List *restrictlist; + List *itm; + + /* + * Would need more work here if we wanted to check for known equality + * of general clauses: there might be multiple base rels involved. + */ + Assert(IsA(var1, Var)); + irel1 = var1->varno; + Assert(IsA(var2, Var)); + irel2 = var2->varno; + + /* + * If both vars belong to same rel, we need to look at that rel's + * baserestrictinfo list. If different rels, each will have a + * joininfo node for the other, and we can scan either list. + */ + rel1 = find_base_rel(root, irel1); + if (irel1 == irel2) + restrictlist = rel1->baserestrictinfo; + else + { + JoinInfo *joininfo = find_joininfo_node(rel1, + makeListi1(irel2)); + + restrictlist = joininfo->jinfo_restrictinfo; + } + + /* + * Scan to see if equality is known. + */ + foreach(itm, restrictlist) + { + RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(itm); + Node *left, + *right; + + if (restrictinfo->mergejoinoperator == InvalidOid) + continue; /* ignore non-mergejoinable clauses */ + /* We now know the restrictinfo clause is a binary opclause */ + left = (Node *) get_leftop(restrictinfo->clause); + right = (Node *) get_rightop(restrictinfo->clause); + if ((equal(var1, left) && equal(var2, right)) || + (equal(var2, left) && equal(var1, right))) + return true; /* found a matching clause */ + } + + return false; +} + +/* * qual_is_redundant * Detect whether an implied-equality qual that turns out to be a * restriction clause for a single base relation is redundant with diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index ab51f0cedb..baccf2ffbd 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -8,14 +8,17 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.128 2002/11/14 19:00:36 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" +#include <limits.h> + #include "catalog/pg_type.h" +#include "miscadmin.h" #include "nodes/makefuncs.h" #ifdef OPTIMIZER_DEBUG #include "nodes/print.h" @@ -35,6 +38,7 @@ #include "parser/parse_expr.h" #include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +#include "utils/selfuncs.h" /* Expression kind codes for preprocess_expression */ @@ -161,6 +165,23 @@ subquery_planner(Query *parse, double tuple_fraction) preprocess_jointree(parse, (Node *) parse->jointree); /* + * Detect whether any rangetable entries are RTE_JOIN kind; if not, + * we can avoid the expense of doing flatten_join_alias_vars(). + * This must be done after we have done pull_up_subqueries, of course. + */ + parse->hasJoinRTEs = false; + foreach(lst, parse->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lst); + + if (rte->rtekind == RTE_JOIN) + { + parse->hasJoinRTEs = true; + break; + } + } + + /* * Do expression preprocessing on targetlist and quals. */ parse->targetList = (List *) @@ -694,9 +715,6 @@ preprocess_jointree(Query *parse, Node *jtnode) static Node * preprocess_expression(Query *parse, Node *expr, int kind) { - bool has_join_rtes; - List *rt; - /* * Simplify constant expressions. * @@ -737,22 +755,8 @@ preprocess_expression(Query *parse, Node *expr, int kind) * with base-relation variables, to allow quals to be pushed down. We * must do this after sublink processing, since it does not recurse * into sublinks. - * - * The flattening pass is expensive enough that it seems worthwhile to - * scan the rangetable to see if we can avoid it. */ - has_join_rtes = false; - foreach(rt, parse->rtable) - { - RangeTblEntry *rte = lfirst(rt); - - if (rte->rtekind == RTE_JOIN) - { - has_join_rtes = true; - break; - } - } - if (has_join_rtes) + if (parse->hasJoinRTEs) expr = flatten_join_alias_vars(expr, parse->rtable, false); return expr; @@ -931,6 +935,9 @@ grouping_planner(Query *parse, double tuple_fraction) AttrNumber *groupColIdx = NULL; Path *cheapest_path; Path *sorted_path; + double dNumGroups = 0; + long numGroups = 0; + int numAggs = 0; bool use_hashed_grouping = false; /* Preprocess targetlist in case we are inside an INSERT/UPDATE. */ @@ -1007,6 +1014,19 @@ grouping_planner(Query *parse, double tuple_fraction) tlist); /* + * Will need actual number of aggregates for estimating costs. + * Also, it's possible that optimization has eliminated all + * aggregates, and we may as well check for that here. + */ + if (parse->hasAggs) + { + numAggs = length(pull_agg_clause((Node *) tlist)) + + length(pull_agg_clause(parse->havingQual)); + if (numAggs == 0) + parse->hasAggs = false; + } + + /* * Figure out whether we need a sorted result from query_planner. * * If we have a GROUP BY clause, then we want a result sorted @@ -1216,6 +1236,14 @@ grouping_planner(Query *parse, double tuple_fraction) if (parse->groupClause) { /* + * Always estimate the number of groups. + */ + dNumGroups = estimate_num_groups(parse, + parse->groupClause, + cheapest_path->parent->rows); + numGroups = (long) Min(dNumGroups, (double) LONG_MAX); + + /* * Executor doesn't support hashed aggregation with DISTINCT * aggregates. (Doing so would imply storing *all* the input * values in the hash table, which seems like a certain loser.) @@ -1226,10 +1254,30 @@ grouping_planner(Query *parse, double tuple_fraction) use_hashed_grouping = false; else { -#if 0 /* much more to do here */ - /* TEMPORARY HOTWIRE FOR TESTING */ - use_hashed_grouping = true; + /* + * Use hashed grouping if (a) we think we can fit the + * hashtable into SortMem, *and* (b) the estimated cost + * is no more than doing it the other way. While avoiding + * the need for sorted input is usually a win, the fact + * that the output won't be sorted may be a loss; so we + * need to do an actual cost comparison. + * + * In most cases we have no good way to estimate the size of + * the transition value needed by an aggregate; arbitrarily + * assume it is 100 bytes. Also set the overhead per hashtable + * entry at 64 bytes. + */ + int hashentrysize = cheapest_path->parent->width + 64 + + numAggs * 100; + + if (hashentrysize * dNumGroups <= SortMem * 1024L) + { + /* much more to do here */ +#if 0 + /* TEMPORARY HOTWIRE FOR TESTING */ + use_hashed_grouping = true; #endif + } } } @@ -1319,6 +1367,8 @@ grouping_planner(Query *parse, double tuple_fraction) AGG_HASHED, length(parse->groupClause), groupColIdx, + numGroups, + numAggs, result_plan); /* Hashed aggregation produces randomly-ordered results */ current_pathkeys = NIL; @@ -1356,6 +1406,8 @@ grouping_planner(Query *parse, double tuple_fraction) aggstrategy, length(parse->groupClause), groupColIdx, + numGroups, + numAggs, result_plan); } else @@ -1387,6 +1439,7 @@ grouping_planner(Query *parse, double tuple_fraction) result_plan = (Plan *) make_group(tlist, length(parse->groupClause), groupColIdx, + dNumGroups, result_plan); } } @@ -1410,6 +1463,16 @@ grouping_planner(Query *parse, double tuple_fraction) { result_plan = (Plan *) make_unique(tlist, result_plan, parse->distinctClause); + /* + * If there was grouping or aggregation, leave plan_rows as-is + * (ie, assume the result was already mostly unique). If not, + * it's reasonable to assume the UNIQUE filter has effects + * comparable to GROUP BY. + */ + if (!parse->groupClause && !parse->hasAggs) + result_plan->plan_rows = estimate_num_groups(parse, + parse->distinctClause, + result_plan->plan_rows); } /* diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 66998b036f..4239d9c3c1 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.81 2002/09/04 20:31:21 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/optimizer/plan/setrefs.c,v 1.82 2002/11/19 23:21:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -439,7 +439,14 @@ join_references_mutator(Node *node, return (Node *) newvar; } - /* Perhaps it's a join alias that can be resolved to input vars? */ + /* Return the Var unmodified, if it's for acceptable_rel */ + if (var->varno == context->acceptable_rel) + return (Node *) copyObject(var); + + /* + * Perhaps it's a join alias that can be resolved to input vars? + * We try this last since it's relatively slow. + */ newnode = flatten_join_alias_vars((Node *) var, context->rtable, true); @@ -450,13 +457,8 @@ join_references_mutator(Node *node, return newnode; } - /* - * No referent found for Var --- either raise an error, or return - * the Var unmodified if it's for acceptable_rel. - */ - if (var->varno != context->acceptable_rel) - elog(ERROR, "join_references: variable not in subplan target lists"); - return (Node *) copyObject(var); + /* No referent found for Var */ + elog(ERROR, "join_references: variable not in subplan target lists"); } return expression_tree_mutator(node, join_references_mutator, |
