1 files changed, 70 insertions, 2 deletions
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 9524765650..dfdf78d11e 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2368,13 +2368,56 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
 }
 
 /*
+ * Construct SELECT statement to acquire the number of rows of a relation.
+ *
+ * Note: we just return the remote server's reltuples value, which might
+ * be off a good deal, but it doesn't seem worth working harder.  See
+ * comments in postgresAcquireSampleRowsFunc.
+ */
+void
+deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
+{
+	StringInfoData relname;
+
+	/* We'll need the remote relation name as a literal. */
+	initStringInfo(&relname);
+	deparseRelation(&relname, rel);
+
+	appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = ");
+	deparseStringLiteral(buf, relname.data);
+	appendStringInfoString(buf, "::pg_catalog.regclass");
+}
+
+/*
  * Construct SELECT statement to acquire sample rows of given relation.
  *
  * SELECT command is appended to buf, and list of columns retrieved
  * is returned to *retrieved_attrs.
+ *
+ * We only support sampling methods we can decide based on server version.
+ * Allowing custom TSM modules (like tsm_system_rows) might be useful, but it
+ * would require detecting which extensions are installed, to allow automatic
+ * fall-back. Moreover, the methods may use different parameters like number
+ * of rows (and not sampling rate). So we leave this for future improvements.
+ *
+ * Using random() to sample rows on the remote server has the advantage that
+ * this works on all PostgreSQL versions (unlike TABLESAMPLE), and that it
+ * does the sampling on the remote side (without transferring everything and
+ * then discarding most rows).
+ *
+ * The disadvantage is that we still have to read all rows and evaluate the
+ * random(), while TABLESAMPLE (at least with the "system" method) may skip.
+ * It's not that different from the "bernoulli" method, though.
+ *
+ * We could also do "ORDER BY random() LIMIT x", which would always pick
+ * the expected number of rows, but it requires sorting so it may be much
+ * more expensive (particularly on large tables, which is what what the
+ * remote sampling is meant to improve).
  */
 void
-deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
+deparseAnalyzeSql(StringInfo buf, Relation rel,
+				  PgFdwSamplingMethod sample_method, double sample_frac,
+				  List **retrieved_attrs)
 {
 	Oid			relid = RelationGetRelid(rel);
 	TupleDesc	tupdesc = RelationGetDescr(rel);
@@ -2422,10 +2465,35 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
 		appendStringInfoString(buf, "NULL");
 
 	/*
-	 * Construct FROM clause
+	 * Construct FROM clause, and perhaps WHERE clause too, depending on the
+	 * selected sampling method.
 	 */
 	appendStringInfoString(buf, " FROM ");
 	deparseRelation(buf, rel);
+
+	switch (sample_method)
+	{
+		case ANALYZE_SAMPLE_OFF:
+			/* nothing to do here */
+			break;
+
+		case ANALYZE_SAMPLE_RANDOM:
+			appendStringInfo(buf, " WHERE pg_catalog.random() < %f", sample_frac);
+			break;
+
+		case ANALYZE_SAMPLE_SYSTEM:
+			appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac));
+			break;
+
+		case ANALYZE_SAMPLE_BERNOULLI:
+			appendStringInfo(buf, " TABLESAMPLE BERNOULLI(%f)", (100.0 * sample_frac));
+			break;
+
+		case ANALYZE_SAMPLE_AUTO:
+			/* should have been resolved into actual method */
+			elog(ERROR, "unexpected sampling method");
+			break;
+	}
 }
 
 /*