summaryrefslogtreecommitdiff
path: root/contrib/postgres_fdw/deparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/postgres_fdw/deparse.c')
-rw-r--r--contrib/postgres_fdw/deparse.c72
1 files changed, 70 insertions, 2 deletions
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c
index 9524765650..dfdf78d11e 100644
--- a/contrib/postgres_fdw/deparse.c
+++ b/contrib/postgres_fdw/deparse.c
@@ -2368,13 +2368,56 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel)
}
/*
+ * Construct SELECT statement to acquire the number of rows of a relation.
+ *
+ * Note: we just return the remote server's reltuples value, which might
+ * be off a good deal, but it doesn't seem worth working harder. See
+ * comments in postgresAcquireSampleRowsFunc.
+ */
+void
+deparseAnalyzeTuplesSql(StringInfo buf, Relation rel)
+{
+ StringInfoData relname;
+
+ /* We'll need the remote relation name as a literal. */
+ initStringInfo(&relname);
+ deparseRelation(&relname, rel);
+
+ appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = ");
+ deparseStringLiteral(buf, relname.data);
+ appendStringInfoString(buf, "::pg_catalog.regclass");
+}
+
+/*
* Construct SELECT statement to acquire sample rows of given relation.
*
* SELECT command is appended to buf, and list of columns retrieved
* is returned to *retrieved_attrs.
+ *
+ * We only support sampling methods we can decide based on server version.
+ * Allowing custom TSM modules (like tsm_system_rows) might be useful, but it
+ * would require detecting which extensions are installed, to allow automatic
+ * fall-back. Moreover, the methods may use different parameters like number
+ * of rows (and not sampling rate). So we leave this for future improvements.
+ *
+ * Using random() to sample rows on the remote server has the advantage that
+ * this works on all PostgreSQL versions (unlike TABLESAMPLE), and that it
+ * does the sampling on the remote side (without transferring everything and
+ * then discarding most rows).
+ *
+ * The disadvantage is that we still have to read all rows and evaluate the
+ * random(), while TABLESAMPLE (at least with the "system" method) may skip.
+ * It's not that different from the "bernoulli" method, though.
+ *
+ * We could also do "ORDER BY random() LIMIT x", which would always pick
+ * the expected number of rows, but it requires sorting so it may be much
+ * more expensive (particularly on large tables, which is what what the
+ * remote sampling is meant to improve).
*/
void
-deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
+deparseAnalyzeSql(StringInfo buf, Relation rel,
+ PgFdwSamplingMethod sample_method, double sample_frac,
+ List **retrieved_attrs)
{
Oid relid = RelationGetRelid(rel);
TupleDesc tupdesc = RelationGetDescr(rel);
@@ -2422,10 +2465,35 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs)
appendStringInfoString(buf, "NULL");
/*
- * Construct FROM clause
+ * Construct FROM clause, and perhaps WHERE clause too, depending on the
+ * selected sampling method.
*/
appendStringInfoString(buf, " FROM ");
deparseRelation(buf, rel);
+
+ switch (sample_method)
+ {
+ case ANALYZE_SAMPLE_OFF:
+ /* nothing to do here */
+ break;
+
+ case ANALYZE_SAMPLE_RANDOM:
+ appendStringInfo(buf, " WHERE pg_catalog.random() < %f", sample_frac);
+ break;
+
+ case ANALYZE_SAMPLE_SYSTEM:
+ appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac));
+ break;
+
+ case ANALYZE_SAMPLE_BERNOULLI:
+ appendStringInfo(buf, " TABLESAMPLE BERNOULLI(%f)", (100.0 * sample_frac));
+ break;
+
+ case ANALYZE_SAMPLE_AUTO:
+ /* should have been resolved into actual method */
+ elog(ERROR, "unexpected sampling method");
+ break;
+ }
}
/*