diff options
Diffstat (limited to 'src/backend/optimizer/path')
| -rw-r--r-- | src/backend/optimizer/path/allpaths.c | 169 | ||||
| -rw-r--r-- | src/backend/optimizer/path/costsize.c | 78 | ||||
| -rw-r--r-- | src/backend/optimizer/path/equivclass.c | 14 |
3 files changed, 234 insertions, 27 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b7cf0b815b..aa9a90cbfa 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -51,6 +51,7 @@ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte); static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); +static List *accumulate_append_subpath(List *subpaths, Path *path); static void set_dummy_rel_pathlist(RelOptInfo *rel); static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte); @@ -283,7 +284,9 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntry *rte) { int parentRTindex = rti; + List *live_childrels = NIL; List *subpaths = NIL; + List *all_child_pathkeys = NIL; double parent_rows; double parent_size; double *parent_attrsizes; @@ -321,7 +324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RelOptInfo *childrel; List *childquals; Node *childqual; - Path *childpath; + ListCell *lcp; ListCell *parentvars; ListCell *childvars; @@ -395,13 +398,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, /* * We have to make child entries in the EquivalenceClass data - * structures as well. + * structures as well. This is needed either if the parent + * participates in some eclass joins (because we will want to + * consider inner-indexscan joins on the individual children) + * or if the parent has useful pathkeys (because we should try + * to build MergeAppend paths that produce those sort orderings). */ - if (rel->has_eclass_joins) - { + if (rel->has_eclass_joins || has_useful_pathkeys(root, rel)) add_child_rel_equivalences(root, appinfo, rel, childrel); - childrel->has_eclass_joins = true; - } + childrel->has_eclass_joins = rel->has_eclass_joins; /* * Note: we could compute appropriate attr_needed data for the child's @@ -411,23 +416,52 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, * otherrels. So we just leave the child's attr_needed empty. */ + /* Remember which childrels are live, for MergeAppend logic below */ + live_childrels = lappend(live_childrels, childrel); + /* * Compute the child's access paths, and add the cheapest one to the * Append path we are constructing for the parent. - * - * It's possible that the child is itself an appendrel, in which case - * we can "cut out the middleman" and just add its child paths to our - * own list. (We don't try to do this earlier because we need to - * apply both levels of transformation to the quals.) */ set_rel_pathlist(root, childrel, childRTindex, childRTE); - childpath = childrel->cheapest_total_path; - if (IsA(childpath, AppendPath)) - subpaths = list_concat(subpaths, - list_copy(((AppendPath *) childpath)->subpaths)); - else - subpaths = lappend(subpaths, childpath); + subpaths = accumulate_append_subpath(subpaths, + childrel->cheapest_total_path); + + /* + * Collect a list of all the available path orderings for all the + * children. We use this as a heuristic to indicate which sort + * orderings we should build MergeAppend paths for. + */ + foreach(lcp, childrel->pathlist) + { + Path *childpath = (Path *) lfirst(lcp); + List *childkeys = childpath->pathkeys; + ListCell *lpk; + bool found = false; + + /* Ignore unsorted paths */ + if (childkeys == NIL) + continue; + + /* Have we already seen this ordering? */ + foreach(lpk, all_child_pathkeys) + { + List *existing_pathkeys = (List *) lfirst(lpk); + + if (compare_pathkeys(existing_pathkeys, + childkeys) == PATHKEYS_EQUAL) + { + found = true; + break; + } + } + if (!found) + { + /* No, so add it to all_child_pathkeys */ + all_child_pathkeys = lappend(all_child_pathkeys, childkeys); + } + } /* * Accumulate size information from each child. @@ -483,17 +517,107 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, pfree(parent_attrsizes); /* - * Finally, build Append path and install it as the only access path for - * the parent rel. (Note: this is correct even if we have zero or one - * live subpath due to constraint exclusion.) + * Next, build an unordered Append path for the rel. (Note: this is + * correct even if we have zero or one live subpath due to constraint + * exclusion.) */ add_path(rel, (Path *) create_append_path(rel, subpaths)); - /* Select cheapest path (pretty easy in this case...) */ + /* + * Next, build MergeAppend paths based on the collected list of child + * pathkeys. We consider both cheapest-startup and cheapest-total + * cases, ie, for each interesting ordering, collect all the cheapest + * startup subpaths and all the cheapest total paths, and build a + * MergeAppend path for each list. + */ + foreach(l, all_child_pathkeys) + { + List *pathkeys = (List *) lfirst(l); + List *startup_subpaths = NIL; + List *total_subpaths = NIL; + bool startup_neq_total = false; + ListCell *lcr; + + /* Select the child paths for this ordering... */ + foreach(lcr, live_childrels) + { + RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr); + Path *cheapest_startup, + *cheapest_total; + + /* Locate the right paths, if they are available. */ + cheapest_startup = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + STARTUP_COST); + cheapest_total = + get_cheapest_path_for_pathkeys(childrel->pathlist, + pathkeys, + TOTAL_COST); + + /* + * If we can't find any paths with the right order just add the + * cheapest-total path; we'll have to sort it. + */ + if (cheapest_startup == NULL) + cheapest_startup = childrel->cheapest_total_path; + if (cheapest_total == NULL) + cheapest_total = childrel->cheapest_total_path; + + /* + * Notice whether we actually have different paths for the + * "cheapest" and "total" cases; frequently there will be no + * point in two create_merge_append_path() calls. + */ + if (cheapest_startup != cheapest_total) + startup_neq_total = true; + + startup_subpaths = + accumulate_append_subpath(startup_subpaths, cheapest_startup); + total_subpaths = + accumulate_append_subpath(total_subpaths, cheapest_total); + } + + /* ... and build the MergeAppend paths */ + add_path(rel, (Path *) create_merge_append_path(root, + rel, + startup_subpaths, + pathkeys)); + if (startup_neq_total) + add_path(rel, (Path *) create_merge_append_path(root, + rel, + total_subpaths, + pathkeys)); + } + + /* Select cheapest path */ set_cheapest(rel); } /* + * accumulate_append_subpath + * Add a subpath to the list being built for an Append or MergeAppend + * + * It's possible that the child is itself an Append path, in which case + * we can "cut out the middleman" and just add its child paths to our + * own list. (We don't try to do this earlier because we need to + * apply both levels of transformation to the quals.) + */ +static List * +accumulate_append_subpath(List *subpaths, Path *path) +{ + if (IsA(path, AppendPath)) + { + AppendPath *apath = (AppendPath *) path; + + /* list_copy is important here to avoid sharing list substructure */ + return list_concat(subpaths, list_copy(apath->subpaths)); + } + else + return lappend(subpaths, path); +} + +/* * set_dummy_rel_pathlist * Build a dummy path for a relation that's been excluded by constraints * @@ -1385,6 +1509,9 @@ print_path(PlannerInfo *root, Path *path, int indent) case T_AppendPath: ptype = "Append"; break; + case T_MergeAppendPath: + ptype = "MergeAppend"; + break; case T_ResultPath: ptype = "Result"; break; diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c index b27dc53fef..067cbca125 100644 --- a/src/backend/optimizer/path/costsize.c +++ b/src/backend/optimizer/path/costsize.c @@ -1210,6 +1210,70 @@ cost_sort(Path *path, PlannerInfo *root, } /* + * cost_merge_append + * Determines and returns the cost of a MergeAppend node. + * + * MergeAppend merges several pre-sorted input streams, using a heap that + * at any given instant holds the next tuple from each stream. If there + * are N streams, we need about N*log2(N) tuple comparisons to construct + * the heap at startup, and then for each output tuple, about log2(N) + * comparisons to delete the top heap entry and another log2(N) comparisons + * to insert its successor from the same stream. + * + * (The effective value of N will drop once some of the input streams are + * exhausted, but it seems unlikely to be worth trying to account for that.) + * + * The heap is never spilled to disk, since we assume N is not very large. + * So this is much simpler than cost_sort. + * + * As in cost_sort, we charge two operator evals per tuple comparison. + * + * 'pathkeys' is a list of sort keys + * 'n_streams' is the number of input streams + * 'input_startup_cost' is the sum of the input streams' startup costs + * 'input_total_cost' is the sum of the input streams' total costs + * 'tuples' is the number of tuples in all the streams + */ +void +cost_merge_append(Path *path, PlannerInfo *root, + List *pathkeys, int n_streams, + Cost input_startup_cost, Cost input_total_cost, + double tuples) +{ + Cost startup_cost = 0; + Cost run_cost = 0; + Cost comparison_cost; + double N; + double logN; + + /* + * Avoid log(0)... + */ + N = (n_streams < 2) ? 2.0 : (double) n_streams; + logN = LOG2(N); + + /* Assumed cost per tuple comparison */ + comparison_cost = 2.0 * cpu_operator_cost; + + /* Heap creation cost */ + startup_cost += comparison_cost * N * logN; + + /* Per-tuple heap maintenance cost */ + run_cost += tuples * comparison_cost * 2.0 * logN; + + /* + * Also charge a small amount (arbitrarily set equal to operator cost) per + * extracted tuple. We don't charge cpu_tuple_cost because a MergeAppend + * node doesn't do qual-checking or projection, so it has less overhead + * than most plan nodes. + */ + run_cost += cpu_operator_cost * tuples; + + path->startup_cost = startup_cost + input_startup_cost; + path->total_cost = startup_cost + run_cost + input_total_cost; +} + +/* * cost_material * Determines and returns the cost of materializing a relation, including * the cost of reading the input data. @@ -1405,7 +1469,9 @@ cost_group(Path *path, PlannerInfo *root, * output row count, which may be lower than the restriction-clause-only row * count of its parent. (We don't include this case in the PATH_ROWS macro * because it applies *only* to a nestloop's inner relation.) We have to - * be prepared to recurse through Append nodes in case of an appendrel. + * be prepared to recurse through Append or MergeAppend nodes in case of an + * appendrel. (It's not clear MergeAppend can be seen here, but we may as + * well handle it if so.) */ static double nestloop_inner_path_rows(Path *path) @@ -1426,6 +1492,16 @@ nestloop_inner_path_rows(Path *path) result += nestloop_inner_path_rows((Path *) lfirst(l)); } } + else if (IsA(path, MergeAppendPath)) + { + ListCell *l; + + result = 0; + foreach(l, ((MergeAppendPath *) path)->subpaths) + { + result += nestloop_inner_path_rows((Path *) lfirst(l)); + } + } else result = PATH_ROWS(path); diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index a20ed5f36c..e44e960b54 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1611,8 +1611,8 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2) * Search for EC members that reference (only) the parent_rel, and * add transformed members referencing the child_rel. * - * We only need to do this for ECs that could generate join conditions, - * since the child members are only used for creating inner-indexscan paths. + * Note that this function won't be called at all unless we have at least some + * reason to believe that the EC members it generates will be useful. * * parent_rel and child_rel could be derived from appinfo, but since the * caller has already computed them, we might as well just pass them in. @@ -1631,10 +1631,14 @@ add_child_rel_equivalences(PlannerInfo *root, ListCell *lc2; /* - * Won't generate joinclauses if const or single-member (the latter - * test covers the volatile case too) + * If this EC contains a constant, then it's not useful for sorting + * or driving an inner index-scan, so we skip generating child EMs. + * + * If this EC contains a volatile expression, then generating child + * EMs would be downright dangerous. We rely on a volatile EC having + * only one EM. */ - if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1) + if (cur_ec->ec_has_const || cur_ec->ec_has_volatile) continue; /* No point in searching if parent rel not mentioned in eclass */ |
