summaryrefslogtreecommitdiff
path: root/src/backend/utils/adt
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2019-02-09 18:32:23 -0500
committerTom Lane <tgl@sss.pgh.pa.us>2019-02-09 18:32:23 -0500
commita391ff3c3d418e404a2c6e4ff0865a107752827b (patch)
treef076c5785d06e7ccf082b08c1651b17d95af4563 /src/backend/utils/adt
parent1fb57af92069ee104c09e2016af9e0e620681be3 (diff)
downloadpostgresql-a391ff3c3d418e404a2c6e4ff0865a107752827b.tar.gz
Build out the planner support function infrastructure.
Add support function requests for estimating the selectivity, cost, and number of result rows (if a SRF) of the target function. The lack of a way to estimate selectivity of a boolean-returning function in WHERE has been a recognized deficiency of the planner since Berkeley days. This commit finally fixes it. In addition, non-constant estimates of cost and number of output rows are now possible. We still fall back to looking at procost and prorows if the support function doesn't service the request, of course. To make concrete use of the possibility of estimating output rowcount for SRFs, this commit adds support functions for array_unnest(anyarray) and the integer variants of generate_series; the lack of plausible rowcount estimates for those, even when it's obvious to a human, has been a repeated subject of complaints. Obviously, much more could now be done in this line, but I'm mostly just trying to get the infrastructure in place. Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r--src/backend/utils/adt/arrayfuncs.c34
-rw-r--r--src/backend/utils/adt/int.c74
-rw-r--r--src/backend/utils/adt/int8.c73
-rw-r--r--src/backend/utils/adt/selfuncs.c13
4 files changed, 182 insertions, 12 deletions
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a785361fd0..5b2917d159 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -22,12 +22,16 @@
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
#include "utils/array.h"
#include "utils/arrayaccess.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/selfuncs.h"
#include "utils/typcache.h"
@@ -6025,6 +6029,36 @@ array_unnest(PG_FUNCTION_ARGS)
}
}
+/*
+ * Planner support function for array_unnest(anyarray)
+ */
+Datum
+array_unnest_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+
+ req->rows = estimate_array_length(arg1);
+ ret = (Node *) req;
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
/*
* array_replace/array_remove support
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index ad8e6d02ee..04825fc77d 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -30,11 +30,15 @@
#include <ctype.h>
#include <limits.h>
+#include <math.h>
#include "catalog/pg_type.h"
#include "common/int.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
#include "utils/array.h"
#include "utils/builtins.h"
@@ -1427,3 +1431,73 @@ generate_series_step_int4(PG_FUNCTION_ARGS)
/* do when there is no more left */
SRF_RETURN_DONE(funcctx);
}
+
+/*
+ * Planner support function for generate_series(int4, int4 [, int4])
+ */
+Datum
+generate_series_int4_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ if (list_length(args) >= 3)
+ arg3 = estimate_expression_value(req->root, lthird(args));
+ else
+ arg3 = NULL;
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned. Use double arithmetic to avoid overflow hazards.
+ */
+ if ((IsA(arg1, Const) &&
+ ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) &&
+ ((Const *) arg2)->constisnull) ||
+ (arg3 != NULL && IsA(arg3, Const) &&
+ ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) &&
+ IsA(arg2, Const) &&
+ (arg3 == NULL || IsA(arg3, Const)))
+ {
+ double start,
+ finish,
+ step;
+
+ start = DatumGetInt32(((Const *) arg1)->constvalue);
+ finish = DatumGetInt32(((Const *) arg2)->constvalue);
+ step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1;
+
+ /* This equation works for either sign of step */
+ if (step != 0)
+ {
+ req->rows = floor((finish - start + step) / step);
+ ret = (Node *) req;
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index d16cc9e574..0ff9394a2f 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -20,6 +20,9 @@
#include "common/int.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
#include "utils/int8.h"
#include "utils/builtins.h"
@@ -1373,3 +1376,73 @@ generate_series_step_int8(PG_FUNCTION_ARGS)
/* do when there is no more left */
SRF_RETURN_DONE(funcctx);
}
+
+/*
+ * Planner support function for generate_series(int8, int8 [, int8])
+ */
+Datum
+generate_series_int8_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ if (list_length(args) >= 3)
+ arg3 = estimate_expression_value(req->root, lthird(args));
+ else
+ arg3 = NULL;
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned. Use double arithmetic to avoid overflow hazards.
+ */
+ if ((IsA(arg1, Const) &&
+ ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) &&
+ ((Const *) arg2)->constisnull) ||
+ (arg3 != NULL && IsA(arg3, Const) &&
+ ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) &&
+ IsA(arg2, Const) &&
+ (arg3 == NULL || IsA(arg3, Const)))
+ {
+ double start,
+ finish,
+ step;
+
+ start = DatumGetInt64(((Const *) arg1)->constvalue);
+ finish = DatumGetInt64(((Const *) arg2)->constvalue);
+ step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1;
+
+ /* This equation works for either sign of step */
+ if (step != 0)
+ {
+ req->rows = floor((finish - start + step) / step);
+ ret = (Node *) req;
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 74fafc64f3..1ef6faecd1 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -1577,17 +1577,6 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
selec = var_eq_const(&vardata, BooleanEqualOperator,
BoolGetDatum(true), false, true, false);
}
- else if (is_funcclause(arg))
- {
- /*
- * If we have no stats and it's a function call, estimate 0.3333333.
- * This seems a pretty unprincipled choice, but Postgres has been
- * using that estimate for function calls since 1992. The hoariness
- * of this behavior suggests that we should not be in too much hurry
- * to use another value.
- */
- selec = 0.3333333;
- }
else
{
/* Otherwise, the default estimate is 0.5 */
@@ -3502,7 +3491,7 @@ estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
* pointless to worry too much about this without much better
* estimates for SRF output rowcounts than we have today.)
*/
- this_srf_multiplier = expression_returns_set_rows(groupexpr);
+ this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
if (srf_multiplier < this_srf_multiplier)
srf_multiplier = this_srf_multiplier;