diff options
Diffstat (limited to 'contrib/postgres_fdw/deparse.c')
| -rw-r--r-- | contrib/postgres_fdw/deparse.c | 72 |
1 files changed, 70 insertions, 2 deletions
diff --git a/contrib/postgres_fdw/deparse.c b/contrib/postgres_fdw/deparse.c index 9524765650..dfdf78d11e 100644 --- a/contrib/postgres_fdw/deparse.c +++ b/contrib/postgres_fdw/deparse.c @@ -2368,13 +2368,56 @@ deparseAnalyzeSizeSql(StringInfo buf, Relation rel) } /* + * Construct SELECT statement to acquire the number of rows of a relation. + * + * Note: we just return the remote server's reltuples value, which might + * be off a good deal, but it doesn't seem worth working harder. See + * comments in postgresAcquireSampleRowsFunc. + */ +void +deparseAnalyzeTuplesSql(StringInfo buf, Relation rel) +{ + StringInfoData relname; + + /* We'll need the remote relation name as a literal. */ + initStringInfo(&relname); + deparseRelation(&relname, rel); + + appendStringInfoString(buf, "SELECT reltuples FROM pg_catalog.pg_class WHERE oid = "); + deparseStringLiteral(buf, relname.data); + appendStringInfoString(buf, "::pg_catalog.regclass"); +} + +/* * Construct SELECT statement to acquire sample rows of given relation. * * SELECT command is appended to buf, and list of columns retrieved * is returned to *retrieved_attrs. + * + * We only support sampling methods we can decide based on server version. + * Allowing custom TSM modules (like tsm_system_rows) might be useful, but it + * would require detecting which extensions are installed, to allow automatic + * fall-back. Moreover, the methods may use different parameters like number + * of rows (and not sampling rate). So we leave this for future improvements. + * + * Using random() to sample rows on the remote server has the advantage that + * this works on all PostgreSQL versions (unlike TABLESAMPLE), and that it + * does the sampling on the remote side (without transferring everything and + * then discarding most rows). + * + * The disadvantage is that we still have to read all rows and evaluate the + * random(), while TABLESAMPLE (at least with the "system" method) may skip. + * It's not that different from the "bernoulli" method, though. + * + * We could also do "ORDER BY random() LIMIT x", which would always pick + * the expected number of rows, but it requires sorting so it may be much + * more expensive (particularly on large tables, which is what what the + * remote sampling is meant to improve). */ void -deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) +deparseAnalyzeSql(StringInfo buf, Relation rel, + PgFdwSamplingMethod sample_method, double sample_frac, + List **retrieved_attrs) { Oid relid = RelationGetRelid(rel); TupleDesc tupdesc = RelationGetDescr(rel); @@ -2422,10 +2465,35 @@ deparseAnalyzeSql(StringInfo buf, Relation rel, List **retrieved_attrs) appendStringInfoString(buf, "NULL"); /* - * Construct FROM clause + * Construct FROM clause, and perhaps WHERE clause too, depending on the + * selected sampling method. */ appendStringInfoString(buf, " FROM "); deparseRelation(buf, rel); + + switch (sample_method) + { + case ANALYZE_SAMPLE_OFF: + /* nothing to do here */ + break; + + case ANALYZE_SAMPLE_RANDOM: + appendStringInfo(buf, " WHERE pg_catalog.random() < %f", sample_frac); + break; + + case ANALYZE_SAMPLE_SYSTEM: + appendStringInfo(buf, " TABLESAMPLE SYSTEM(%f)", (100.0 * sample_frac)); + break; + + case ANALYZE_SAMPLE_BERNOULLI: + appendStringInfo(buf, " TABLESAMPLE BERNOULLI(%f)", (100.0 * sample_frac)); + break; + + case ANALYZE_SAMPLE_AUTO: + /* should have been resolved into actual method */ + elog(ERROR, "unexpected sampling method"); + break; + } } /* |
