3 files changed, 234 insertions, 27 deletions
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index b7cf0b815b..aa9a90cbfa 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -51,6 +51,7 @@ static void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					   RangeTblEntry *rte);
 static void set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 						Index rti, RangeTblEntry *rte);
+static List *accumulate_append_subpath(List *subpaths, Path *path);
 static void set_dummy_rel_pathlist(RelOptInfo *rel);
 static void set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
 					  Index rti, RangeTblEntry *rte);
@@ -283,7 +284,9 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 						Index rti, RangeTblEntry *rte)
 {
 	int			parentRTindex = rti;
+	List	   *live_childrels = NIL;
 	List	   *subpaths = NIL;
+	List	   *all_child_pathkeys = NIL;
 	double		parent_rows;
 	double		parent_size;
 	double	   *parent_attrsizes;
@@ -321,7 +324,7 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		RelOptInfo *childrel;
 		List	   *childquals;
 		Node	   *childqual;
-		Path	   *childpath;
+		ListCell   *lcp;
 		ListCell   *parentvars;
 		ListCell   *childvars;
 
@@ -395,13 +398,15 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 
 		/*
 		 * We have to make child entries in the EquivalenceClass data
-		 * structures as well.
+		 * structures as well.  This is needed either if the parent
+		 * participates in some eclass joins (because we will want to
+		 * consider inner-indexscan joins on the individual children)
+		 * or if the parent has useful pathkeys (because we should try
+		 * to build MergeAppend paths that produce those sort orderings).
 		 */
-		if (rel->has_eclass_joins)
-		{
+		if (rel->has_eclass_joins || has_useful_pathkeys(root, rel))
 			add_child_rel_equivalences(root, appinfo, rel, childrel);
-			childrel->has_eclass_joins = true;
-		}
+		childrel->has_eclass_joins = rel->has_eclass_joins;
 
 		/*
 		 * Note: we could compute appropriate attr_needed data for the child's
@@ -411,23 +416,52 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 		 * otherrels.  So we just leave the child's attr_needed empty.
 		 */
 
+		/* Remember which childrels are live, for MergeAppend logic below */
+		live_childrels = lappend(live_childrels, childrel);
+
 		/*
 		 * Compute the child's access paths, and add the cheapest one to the
 		 * Append path we are constructing for the parent.
-		 *
-		 * It's possible that the child is itself an appendrel, in which case
-		 * we can "cut out the middleman" and just add its child paths to our
-		 * own list.  (We don't try to do this earlier because we need to
-		 * apply both levels of transformation to the quals.)
 		 */
 		set_rel_pathlist(root, childrel, childRTindex, childRTE);
 
-		childpath = childrel->cheapest_total_path;
-		if (IsA(childpath, AppendPath))
-			subpaths = list_concat(subpaths,
-							list_copy(((AppendPath *) childpath)->subpaths));
-		else
-			subpaths = lappend(subpaths, childpath);
+		subpaths = accumulate_append_subpath(subpaths,
+											 childrel->cheapest_total_path);
+
+		/*
+		 * Collect a list of all the available path orderings for all the
+		 * children.  We use this as a heuristic to indicate which sort
+		 * orderings we should build MergeAppend paths for.
+		 */
+		foreach(lcp, childrel->pathlist)
+		{
+			Path	   *childpath = (Path *) lfirst(lcp);
+			List	   *childkeys = childpath->pathkeys;
+			ListCell   *lpk;
+			bool		found = false;
+
+			/* Ignore unsorted paths */
+			if (childkeys == NIL)
+				continue;
+
+			/* Have we already seen this ordering? */
+			foreach(lpk, all_child_pathkeys)
+			{
+				List   *existing_pathkeys = (List *) lfirst(lpk);
+
+				if (compare_pathkeys(existing_pathkeys,
+									 childkeys) == PATHKEYS_EQUAL)
+				{
+					found = true;
+					break;
+				}
+			}
+			if (!found)
+			{
+				/* No, so add it to all_child_pathkeys */
+				all_child_pathkeys = lappend(all_child_pathkeys, childkeys);
+			}
+		}
 
 		/*
 		 * Accumulate size information from each child.
@@ -483,17 +517,107 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
 	pfree(parent_attrsizes);
 
 	/*
-	 * Finally, build Append path and install it as the only access path for
-	 * the parent rel.	(Note: this is correct even if we have zero or one
-	 * live subpath due to constraint exclusion.)
+	 * Next, build an unordered Append path for the rel.  (Note: this is
+	 * correct even if we have zero or one live subpath due to constraint
+	 * exclusion.)
 	 */
 	add_path(rel, (Path *) create_append_path(rel, subpaths));
 
-	/* Select cheapest path (pretty easy in this case...) */
+	/*
+	 * Next, build MergeAppend paths based on the collected list of child
+	 * pathkeys.  We consider both cheapest-startup and cheapest-total
+	 * cases, ie, for each interesting ordering, collect all the cheapest
+	 * startup subpaths and all the cheapest total paths, and build a
+	 * MergeAppend path for each list.
+	 */
+	foreach(l, all_child_pathkeys)
+	{
+		List   *pathkeys = (List *) lfirst(l);
+		List   *startup_subpaths = NIL;
+		List   *total_subpaths = NIL;
+		bool	startup_neq_total = false;
+		ListCell *lcr;
+
+		/* Select the child paths for this ordering... */
+		foreach(lcr, live_childrels)
+		{
+			RelOptInfo *childrel = (RelOptInfo *) lfirst(lcr);
+			Path	   *cheapest_startup,
+					   *cheapest_total;
+
+			/* Locate the right paths, if they are available. */
+			cheapest_startup =
+				get_cheapest_path_for_pathkeys(childrel->pathlist,
+											   pathkeys,
+											   STARTUP_COST);
+			cheapest_total =
+				get_cheapest_path_for_pathkeys(childrel->pathlist,
+											   pathkeys,
+											   TOTAL_COST);
+
+			/*
+			 * If we can't find any paths with the right order just add the
+			 * cheapest-total path; we'll have to sort it.
+			 */
+			if (cheapest_startup == NULL)
+				cheapest_startup = childrel->cheapest_total_path;
+			if (cheapest_total == NULL)
+				cheapest_total = childrel->cheapest_total_path;
+
+			/*
+			 * Notice whether we actually have different paths for the
+			 * "cheapest" and "total" cases; frequently there will be no
+			 * point in two create_merge_append_path() calls.
+			 */
+			if (cheapest_startup != cheapest_total)
+				startup_neq_total = true;
+
+			startup_subpaths =
+				accumulate_append_subpath(startup_subpaths, cheapest_startup);
+			total_subpaths =
+				accumulate_append_subpath(total_subpaths, cheapest_total);
+		}
+
+		/* ... and build the MergeAppend paths */
+		add_path(rel, (Path *) create_merge_append_path(root,
+														rel,
+														startup_subpaths,
+														pathkeys));
+		if (startup_neq_total)
+			add_path(rel, (Path *) create_merge_append_path(root,
+															rel,
+															total_subpaths,
+															pathkeys));
+	}
+
+	/* Select cheapest path */
 	set_cheapest(rel);
 }
 
 /*
+ * accumulate_append_subpath
+ *		Add a subpath to the list being built for an Append or MergeAppend
+ *
+ * It's possible that the child is itself an Append path, in which case
+ * we can "cut out the middleman" and just add its child paths to our
+ * own list.  (We don't try to do this earlier because we need to
+ * apply both levels of transformation to the quals.)
+ */
+static List *
+accumulate_append_subpath(List *subpaths, Path *path)
+{
+	if (IsA(path, AppendPath))
+	{
+		AppendPath	*apath = (AppendPath *) path;
+
+		/* list_copy is important here to avoid sharing list substructure */
+		return list_concat(subpaths, list_copy(apath->subpaths));
+	}
+	else
+		return lappend(subpaths, path);
+}
+
+/*
  * set_dummy_rel_pathlist
  *	  Build a dummy path for a relation that's been excluded by constraints
  *
@@ -1385,6 +1509,9 @@ print_path(PlannerInfo *root, Path *path, int indent)
 		case T_AppendPath:
 			ptype = "Append";
 			break;
+		case T_MergeAppendPath:
+			ptype = "MergeAppend";
+			break;
 		case T_ResultPath:
 			ptype = "Result";
 			break;
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index b27dc53fef..067cbca125 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1210,6 +1210,70 @@ cost_sort(Path *path, PlannerInfo *root,
 }
 
 /*
+ * cost_merge_append
+ *	  Determines and returns the cost of a MergeAppend node.
+ *
+ * MergeAppend merges several pre-sorted input streams, using a heap that
+ * at any given instant holds the next tuple from each stream.  If there
+ * are N streams, we need about N*log2(N) tuple comparisons to construct
+ * the heap at startup, and then for each output tuple, about log2(N)
+ * comparisons to delete the top heap entry and another log2(N) comparisons
+ * to insert its successor from the same stream.
+ *
+ * (The effective value of N will drop once some of the input streams are
+ * exhausted, but it seems unlikely to be worth trying to account for that.)
+ *
+ * The heap is never spilled to disk, since we assume N is not very large.
+ * So this is much simpler than cost_sort.
+ *
+ * As in cost_sort, we charge two operator evals per tuple comparison.
+ *
+ * 'pathkeys' is a list of sort keys
+ * 'n_streams' is the number of input streams
+ * 'input_startup_cost' is the sum of the input streams' startup costs
+ * 'input_total_cost' is the sum of the input streams' total costs
+ * 'tuples' is the number of tuples in all the streams
+ */
+void
+cost_merge_append(Path *path, PlannerInfo *root,
+				  List *pathkeys, int n_streams,
+				  Cost input_startup_cost, Cost input_total_cost,
+				  double tuples)
+{
+	Cost		startup_cost = 0;
+	Cost		run_cost = 0;
+	Cost		comparison_cost;
+	double		N;
+	double		logN;
+
+	/*
+	 * Avoid log(0)...
+	 */
+	N = (n_streams < 2) ? 2.0 : (double) n_streams;
+	logN = LOG2(N);
+
+	/* Assumed cost per tuple comparison */
+	comparison_cost = 2.0 * cpu_operator_cost;
+
+	/* Heap creation cost */
+	startup_cost += comparison_cost * N * logN;
+
+	/* Per-tuple heap maintenance cost */
+	run_cost += tuples * comparison_cost * 2.0 * logN;
+
+	/*
+	 * Also charge a small amount (arbitrarily set equal to operator cost) per
+	 * extracted tuple.  We don't charge cpu_tuple_cost because a MergeAppend
+	 * node doesn't do qual-checking or projection, so it has less overhead
+	 * than most plan nodes.
+	 */
+	run_cost += cpu_operator_cost * tuples;
+
+	path->startup_cost = startup_cost + input_startup_cost;
+	path->total_cost = startup_cost + run_cost + input_total_cost;
+}
+
+/*
  * cost_material
  *	  Determines and returns the cost of materializing a relation, including
  *	  the cost of reading the input data.
@@ -1405,7 +1469,9 @@ cost_group(Path *path, PlannerInfo *root,
  * output row count, which may be lower than the restriction-clause-only row
  * count of its parent.  (We don't include this case in the PATH_ROWS macro
  * because it applies *only* to a nestloop's inner relation.)  We have to
- * be prepared to recurse through Append nodes in case of an appendrel.
+ * be prepared to recurse through Append or MergeAppend nodes in case of an
+ * appendrel.  (It's not clear MergeAppend can be seen here, but we may as
+ * well handle it if so.)
  */
 static double
 nestloop_inner_path_rows(Path *path)
@@ -1426,6 +1492,16 @@ nestloop_inner_path_rows(Path *path)
 			result += nestloop_inner_path_rows((Path *) lfirst(l));
 		}
 	}
+	else if (IsA(path, MergeAppendPath))
+	{
+		ListCell   *l;
+
+		result = 0;
+		foreach(l, ((MergeAppendPath *) path)->subpaths)
+		{
+			result += nestloop_inner_path_rows((Path *) lfirst(l));
+		}
+	}
 	else
 		result = PATH_ROWS(path);
 
diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index a20ed5f36c..e44e960b54 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -1611,8 +1611,8 @@ exprs_known_equal(PlannerInfo *root, Node *item1, Node *item2)
  *	  Search for EC members that reference (only) the parent_rel, and
  *	  add transformed members referencing the child_rel.
  *
- * We only need to do this for ECs that could generate join conditions,
- * since the child members are only used for creating inner-indexscan paths.
+ * Note that this function won't be called at all unless we have at least some
+ * reason to believe that the EC members it generates will be useful.
  *
  * parent_rel and child_rel could be derived from appinfo, but since the
  * caller has already computed them, we might as well just pass them in.
@@ -1631,10 +1631,14 @@ add_child_rel_equivalences(PlannerInfo *root,
 		ListCell   *lc2;
 
 		/*
-		 * Won't generate joinclauses if const or single-member (the latter
-		 * test covers the volatile case too)
+		 * If this EC contains a constant, then it's not useful for sorting
+		 * or driving an inner index-scan, so we skip generating child EMs.
+		 *
+		 * If this EC contains a volatile expression, then generating child
+		 * EMs would be downright dangerous.  We rely on a volatile EC having
+		 * only one EM.
 		 */
-		if (cur_ec->ec_has_const || list_length(cur_ec->ec_members) <= 1)
+		if (cur_ec->ec_has_const || cur_ec->ec_has_volatile)
 			continue;
 
 		/* No point in searching if parent rel not mentioned in eclass */