summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/path
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/path')
-rw-r--r--src/backend/optimizer/path/allpaths.c169
-rw-r--r--src/backend/optimizer/path/costsize.c78
-rw-r--r--src/backend/optimizer/path/equivclass.c14
3 files changed, 234 insertions, 27 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index b7cf0b815b..aa9a90cbfa 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -51,6 +51,7 @@ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RangeTblEntry *rte);
static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
+static List *accumulate_append_subpath(List *subpaths, Path *path);
static void set_dummy_rel_pathlist(RelOptInfo *rel);
static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
@@ -283,7 +284,9 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
int parentRTindex = rti;
+ List *live_childrels = NIL;
List *subpaths = NIL;
+ List *all_child_pathkeys = NIL;
double parent_rows;
double parent_size;
double *parent_attrsizes;
@@ -321,7 +324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
RelOptInfo *childrel;
List *childquals;
Node *childqual;
- Path *childpath;
+ ListCell *lcp;
ListCell *parentvars;
ListCell *childvars;
@@ -395,13 +398,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
/*
* We have to make child entries in the EquivalenceClass data
- * structures as well.
+ * structures as well. This is needed either if the parent
+ * participates in some eclass joins (because we will want to
+ * consider inner-indexscan joins on the individual children)
+ * or if the parent has useful pathkeys (because we should try
+ * to build MergeAppend paths that produce those sort orderings).
*/
- if (rel->has_eclass_joins)
- {
+ if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
add_child_rel_equivalences(root, appinfo, rel, childrel);
- childrel->has_eclass_joins = true;
- }
+ childrel->has_eclass_joins = rel->has_eclass_joins;
/*
* Note: we could compute appropriate attr_needed data for the child's
@@ -411,23 +416,52 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
* otherrels. So we just leave the child's attr_needed empty.
*/
+ /* Remember which childrels are live, for MergeAppend logic below */
+ live_childrels = lappend(live_childrels, childrel);
+
/*
* Compute the child's access paths, and add the cheapest one to the
* Append path we are constructing for the parent.
- *
- * It's possible that the child is itself an appendrel, in which case
- * we can "cut out the middleman" and just add its child paths to our
- * own list. (We don't try to do this earlier because we need to
- * apply both levels of transformation to the quals.)
*/
set_rel_pathlist(root, childrel, childRTindex, childRTE);
- childpath = childrel->cheapest_total_path;
- if (IsA(childpath, AppendPath))
- subpaths = list_concat(subpaths,
- list_copy(((AppendPath *) childpath)->subpaths));
- else
- subpaths = lappend(subpaths, childpath);
+ subpaths = accumulate_append_subpath(subpaths,
+ childrel->cheapest_total_path);
+
+ /*
+ * Collect a list of all the available path orderings for all the
+ * children. We use this as a heuristic to indicate which sort
+ * orderings we should build MergeAppend paths for.
+ */
+ foreach(lcp, childrel->pathlist)
+ {
+ Path *childpath = (Path *) lfirst(lcp);
+ List *childkeys = childpath->pathkeys;
+ ListCell *lpk;
+ bool found = false;
+
+ /* Ignore unsorted paths */
+ if (childkeys == NIL)
+ continue;
+
+ /* Have we already seen this ordering? */
+ foreach(lpk, all_child_pathkeys)
+ {
+ List *existing_pathkeys = (List *) lfirst(lpk);
+
+ if (compare_pathkeys(existing_pathkeys,
+ childkeys) == PATHKEYS_EQUAL)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ {
+ /* No, so add it to all_child_pathkeys */
+ all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
+ }
+ }
/*
* Accumulate size information from each child.
@@ -483,17 +517,107 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
pfree(parent_attrsizes);
/*
- * Finally, build Append path and install it as the only access path for
- * the parent rel. (Note: this is correct even if we have zero or one
- * live subpath due to constraint exclusion.)
+ * Next, build an unordered Append path for the rel. (Note: this is
+ * correct even if we have zero or one live subpath due to constraint
+ * exclusion.)
*/
add_path(rel, (Path *) create_append_path(rel, subpaths));
- /* Select cheapest path (pretty easy in this case...) */
+ /*
+ * Next, build MergeAppend paths based on the collected list of child
+ * pathkeys. We consider both cheapest-startup and cheapest-total
+ * cases, ie, for each interesting ordering, collect all the cheapest
+ * startup subpaths and all the cheapest total paths, and build a
+ * MergeAppend path for each list.
+ */
+ foreach(l, all_child_pathkeys)
+ {
+ List *pathkeys = (List *) lfirst(l);
+ List *startup_subpaths = NIL;
+ List *total_subpaths = NIL;
+ bool startup_neq_total = false;
+ ListCell *lcr;
+
+ /* Select the child paths for this ordering... */
+ foreach(lcr, live_childrels)
+ {
+ RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+ Path *cheapest_startup,
+ *cheapest_total;
+
+ /* Locate the right paths, if they are available. */
+ cheapest_startup =
+ get_cheapest_path_for_pathkeys(childrel->pathlist,
+ pathkeys,
+ STARTUP_COST);
+ cheapest_total =
+ get_cheapest_path_for_pathkeys(childrel->pathlist,
+ pathkeys,
+ TOTAL_COST);
+
+ /*
+ * If we can't find any paths with the right order just add the
+ * cheapest-total path; we'll have to sort it.
+ */
+ if (cheapest_startup == NULL)
+ cheapest_startup = childrel->cheapest_total_path;
+ if (cheapest_total == NULL)
+ cheapest_total = childrel->cheapest_total_path;
+
+ /*
+ * Notice whether we actually have different paths for the
+ * "cheapest" and "total" cases; frequently there will be no
+ * point in two create_merge_append_path() calls.
+ */
+ if (cheapest_startup != cheapest_total)
+ startup_neq_total = true;
+
+ startup_subpaths =
+ accumulate_append_subpath(startup_subpaths, cheapest_startup);
+ total_subpaths =
+ accumulate_append_subpath(total_subpaths, cheapest_total);
+ }
+
+ /* ... and build the MergeAppend paths */
+ add_path(rel, (Path *) create_merge_append_path(root,
+ rel,
+ startup_subpaths,
+ pathkeys));
+ if (startup_neq_total)
+ add_path(rel, (Path *) create_merge_append_path(root,
+ rel,
+ total_subpaths,
+ pathkeys));
+ }
+
+ /* Select cheapest path */
set_cheapest(rel);
}
/*
+ * accumulate_append_subpath
+ * Add a subpath to the list being built for an Append or MergeAppend
+ *
+ * It's possible that the child is itself an Append path, in which case
+ * we can "cut out the middleman" and just add its child paths to our
+ * own list. (We don't try to do this earlier because we need to
+ * apply both levels of transformation to the quals.)
+ */
+static List *
+accumulate_append_subpath(List *subpaths, Path *path)
+{
+ if (IsA(path, AppendPath))
+ {
+ AppendPath *apath = (AppendPath *) path;
+
+ /* list_copy is important here to avoid sharing list substructure */
+ return list_concat(subpaths, list_copy(apath->subpaths));
+ }
+ else
+ return lappend(subpaths, path);
+}
+
+/*
* set_dummy_rel_pathlist
* Build a dummy path for a relation that's been excluded by constraints
*
@@ -1385,6 +1509,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
case T_AppendPath:
ptype = "Append";
break;
+ case T_MergeAppendPath:
+ ptype = "MergeAppend";
+ break;
case T_ResultPath:
ptype = "Result";
break;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b27dc53fef..067cbca125 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1210,6 +1210,70 @@ cost_sort(Path *path, PlannerInfo *root,
}
/*
+ * cost_merge_append
+ * Determines and returns the cost of a MergeAppend node.
+ *
+ * MergeAppend merges several pre-sorted input streams, using a heap that
+ * at any given instant holds the next tuple from each stream. If there
+ * are N streams, we need about N*log2(N) tuple comparisons to construct
+ * the heap at startup, and then for each output tuple, about log2(N)
+ * comparisons to delete the top heap entry and another log2(N) comparisons
+ * to insert its successor from the same stream.
+ *
+ * (The effective value of N will drop once some of the input streams are
+ * exhausted, but it seems unlikely to be worth trying to account for that.)
+ *
+ * The heap is never spilled to disk, since we assume N is not very large.
+ * So this is much simpler than cost_sort.
+ *
+ * As in cost_sort, we charge two operator evals per tuple comparison.
+ *
+ * 'pathkeys' is a list of sort keys
+ * 'n_streams' is the number of input streams
+ * 'input_startup_cost' is the sum of the input streams' startup costs
+ * 'input_total_cost' is the sum of the input streams' total costs
+ * 'tuples' is the number of tuples in all the streams
+ */
+void
+cost_merge_append(Path *path, PlannerInfo *root,
+ List *pathkeys, int n_streams,
+ Cost input_startup_cost, Cost input_total_cost,
+ double tuples)
+{
+ Cost startup_cost = 0;
+ Cost run_cost = 0;
+ Cost comparison_cost;
+ double N;
+ double logN;
+
+ /*
+ * Avoid log(0)...
+ */
+ N = (n_streams < 2) ? 2.0 : (double) n_streams;
+ logN = LOG2(N);
+
+ /* Assumed cost per tuple comparison */
+ comparison_cost = 2.0 * cpu_operator_cost;
+
+ /* Heap creation cost */
+ startup_cost += comparison_cost * N * logN;
+
+ /* Per-tuple heap maintenance cost */
+ run_cost += tuples * comparison_cost * 2.0 * logN;
+
+ /*
+ * Also charge a small amount (arbitrarily set equal to operator cost) per
+ * extracted tuple. We don't charge cpu_tuple_cost because a MergeAppend
+ * node doesn't do qual-checking or projection, so it has less overhead
+ * than most plan nodes.
+ */
+ run_cost += cpu_operator_cost * tuples;
+
+ path->startup_cost = startup_cost + input_startup_cost;
+ path->total_cost = startup_cost + run_cost + input_total_cost;
+}
+
+/*
* cost_material
* Determines and returns the cost of materializing a relation, including
* the cost of reading the input data.
@@ -1405,7 +1469,9 @@ cost_group(Path *path, PlannerInfo *root,
* output row count, which may be lower than the restriction-clause-only row
* count of its parent. (We don't include this case in the PATH_ROWS macro
* because it applies *only* to a nestloop's inner relation.) We have to
- * be prepared to recurse through Append nodes in case of an appendrel.
+ * be prepared to recurse through Append or MergeAppend nodes in case of an
+ * appendrel. (It's not clear MergeAppend can be seen here, but we may as
+ * well handle it if so.)
*/
static double
nestloop_inner_path_rows(Path *path)
@@ -1426,6 +1492,16 @@ nestloop_inner_path_rows(Path *path)
result += nestloop_inner_path_rows((Path *) lfirst(l));
}
}
+ else if (IsA(path, MergeAppendPath))
+ {
+ ListCell *l;
+
+ result = 0;
+ foreach(l, ((MergeAppendPath *) path)->subpaths)
+ {
+ result += nestloop_inner_path_rows((Path *) lfirst(l));
+ }
+ }
else
result = PATH_ROWS(path);
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index a20ed5f36c..e44e960b54 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -1611,8 +1611,8 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
* Search for EC members that reference (only) the parent_rel, and
* add transformed members referencing the child_rel.
*
- * We only need to do this for ECs that could generate join conditions,
- * since the child members are only used for creating inner-indexscan paths.
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
*
* parent_rel and child_rel could be derived from appinfo, but since the
* caller has already computed them, we might as well just pass them in.
@@ -1631,10 +1631,14 @@ add_child_rel_equivalences(PlannerInfo *root,
ListCell *lc2;
/*
- * Won't generate joinclauses if const or single-member (the latter
- * test covers the volatile case too)
+ * If this EC contains a constant, then it's not useful for sorting
+ * or driving an inner index-scan, so we skip generating child EMs.
+ *
+ * If this EC contains a volatile expression, then generating child
+ * EMs would be downright dangerous. We rely on a volatile EC having
+ * only one EM.
*/
- if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+ if (cur_ec->ec_has_const || cur_ec->ec_has_volatile)
continue;
/* No point in searching if parent rel not mentioned in eclass */