summaryrefslogtreecommitdiff
path: root/src/backend/optimizer/plan/subselect.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/optimizer/plan/subselect.c')
-rw-r--r--src/backend/optimizer/plan/subselect.c348
1 files changed, 258 insertions, 90 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 72e951abd0..1e4e9fe565 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.132 2008/07/10 02:14:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.133 2008/08/14 18:47:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,6 +21,7 @@
#include "optimizer/cost.h"
#include "optimizer/planmain.h"
#include "optimizer/planner.h"
+#include "optimizer/prep.h"
#include "optimizer/subselect.h"
#include "optimizer/var.h"
#include "parser/parse_expr.h"
@@ -62,6 +63,7 @@ static Node *convert_testexpr_mutator(Node *node,
convert_testexpr_context *context);
static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan);
static bool hash_ok_operator(OpExpr *expr);
+static bool simplify_EXISTS_query(Query *query);
static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root);
static Node *process_sublinks_mutator(Node *node,
process_sublinks_context *context);
@@ -217,11 +219,16 @@ generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod)
static Oid
get_first_col_type(Plan *plan)
{
- TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
+ /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */
+ if (plan->targetlist)
+ {
+ TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist);
- Assert(IsA(tent, TargetEntry));
- Assert(!tent->resjunk);
- return exprType((Node *) tent->expr);
+ Assert(IsA(tent, TargetEntry));
+ if (!tent->resjunk)
+ return exprType((Node *) tent->expr);
+ }
+ return VOIDOID;
}
/*
@@ -259,6 +266,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual)
subquery = (Query *) copyObject(subquery);
/*
+ * If it's an EXISTS subplan, we might be able to simplify it.
+ */
+ if (slink->subLinkType == EXISTS_SUBLINK)
+ (void) simplify_EXISTS_query(subquery);
+
+ /*
* For an EXISTS subplan, tell lower-level planner to expect that only the
* first tuple will be retrieved. For ALL and ANY subplans, we will be
* able to stop evaluating if the test condition fails, so very often not
@@ -710,80 +723,32 @@ hash_ok_operator(OpExpr *expr)
}
/*
- * convert_IN_to_join: can we convert an IN SubLink to join style?
+ * convert_ANY_sublink_to_join: can we convert an ANY SubLink to a join?
*
- * The caller has found a SubLink at the top level of WHERE, but has not
- * checked the properties of the SubLink at all. Decide whether it is
+ * The caller has found an ANY SubLink at the top level of WHERE, but has not
+ * checked the properties of the SubLink further. Decide whether it is
* appropriate to process this SubLink in join style. If not, return NULL.
* If so, build the qual clause(s) to replace the SubLink, and return them.
+ * The qual clauses are wrapped in a FlattenedSubLink node to help later
+ * processing place them properly.
*
* Side effects of a successful conversion include adding the SubLink's
- * subselect to the query's rangetable and adding an InClauseInfo node to
- * its in_info_list.
+ * subselect to the query's rangetable.
*/
Node *
-convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
+convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink)
{
Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect;
- List *in_operators;
- List *left_exprs;
- List *right_exprs;
Relids left_varnos;
int rtindex;
RangeTblEntry *rte;
RangeTblRef *rtr;
List *subquery_vars;
- InClauseInfo *ininfo;
- Node *result;
-
- /*
- * The sublink type must be "= ANY" --- that is, an IN operator. We
- * expect that the test expression will be either a single OpExpr, or an
- * AND-clause containing OpExprs. (If it's anything else then the parser
- * must have determined that the operators have non-equality-like
- * semantics. In the OpExpr case we can't be sure what the operator's
- * semantics are like, and must check for ourselves.)
- */
- if (sublink->subLinkType != ANY_SUBLINK)
- return NULL;
- if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
- {
- OpExpr *op = (OpExpr *) sublink->testexpr;
- Oid opno = op->opno;
- List *opfamilies;
- List *opstrats;
-
- if (list_length(op->args) != 2)
- return NULL; /* not binary operator? */
- get_op_btree_interpretation(opno, &opfamilies, &opstrats);
- if (!list_member_int(opstrats, ROWCOMPARE_EQ))
- return NULL;
- in_operators = list_make1_oid(opno);
- left_exprs = list_make1(linitial(op->args));
- right_exprs = list_make1(lsecond(op->args));
- }
- else if (and_clause(sublink->testexpr))
- {
- ListCell *lc;
+ Expr *quals;
+ FlattenedSubLink *fslink;
- /* OK, but we need to extract the per-column info */
- in_operators = left_exprs = right_exprs = NIL;
- foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
- {
- OpExpr *op = (OpExpr *) lfirst(lc);
-
- if (!IsA(op, OpExpr)) /* probably shouldn't happen */
- return NULL;
- if (list_length(op->args) != 2)
- return NULL; /* not binary operator? */
- in_operators = lappend_oid(in_operators, op->opno);
- left_exprs = lappend(left_exprs, linitial(op->args));
- right_exprs = lappend(right_exprs, lsecond(op->args));
- }
- }
- else
- return NULL;
+ Assert(sublink->subLinkType == ANY_SUBLINK);
/*
* The sub-select must not refer to any Vars of the parent query. (Vars of
@@ -793,16 +758,14 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
return NULL;
/*
- * The left-hand expressions must contain some Vars of the current query,
- * else it's not gonna be a join.
+ * The test expression must contain some Vars of the current query,
+ * else it's not gonna be a join. (Note that it won't have Vars
+ * referring to the subquery, rather Params.)
*/
- left_varnos = pull_varnos((Node *) left_exprs);
+ left_varnos = pull_varnos(sublink->testexpr);
if (bms_is_empty(left_varnos))
return NULL;
- /* ... and the right-hand expressions better not contain Vars at all */
- Assert(!contain_var_clause((Node *) right_exprs));
-
/*
* The combining operators and left-hand expressions mustn't be volatile.
*/
@@ -819,12 +782,19 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
*/
rte = addRangeTableEntryForSubquery(NULL,
subselect,
- makeAlias("IN_subquery", NIL),
+ makeAlias("ANY_subquery", NIL),
false);
parse->rtable = lappend(parse->rtable, rte);
rtindex = list_length(parse->rtable);
rtr = makeNode(RangeTblRef);
rtr->rtindex = rtindex;
+
+ /*
+ * We assume it's okay to add the pulled-up subquery to the topmost FROM
+ * list. This should be all right for ANY clauses appearing in WHERE
+ * or in upper-level plain JOIN/ON clauses. ANYs appearing below any
+ * outer joins couldn't be placed there, however.
+ */
parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
/*
@@ -837,34 +807,232 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
/*
* Build the result qual expression, replacing Params with these Vars.
*/
- result = convert_testexpr(root,
- sublink->testexpr,
- subquery_vars);
+ quals = (Expr *) convert_testexpr(root,
+ sublink->testexpr,
+ subquery_vars);
+
+ /*
+ * Now build the FlattenedSubLink node.
+ */
+ fslink = makeNode(FlattenedSubLink);
+ fslink->jointype = JOIN_SEMI;
+ fslink->lefthand = left_varnos;
+ fslink->righthand = bms_make_singleton(rtindex);
+ fslink->quals = quals;
+
+ return (Node *) fslink;
+}
+
+/*
+ * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery
+ *
+ * The only thing that matters about an EXISTS query is whether it returns
+ * zero or more than zero rows. Therefore, we can remove certain SQL features
+ * that won't affect that. The only part that is really likely to matter in
+ * typical usage is simplifying the targetlist: it's a common habit to write
+ * "SELECT * FROM" even though there is no need to evaluate any columns.
+ *
+ * Note: by suppressing the targetlist we could cause an observable behavioral
+ * change, namely that any errors that might occur in evaluating the tlist
+ * won't occur, nor will other side-effects of volatile functions. This seems
+ * unlikely to bother anyone in practice.
+ *
+ * Returns TRUE if was able to discard the targetlist, else FALSE.
+ */
+static bool
+simplify_EXISTS_query(Query *query)
+{
+ /*
+ * We don't try to simplify at all if the query uses set operations,
+ * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these
+ * seem likely in normal usage and their possible effects are complex.
+ */
+ if (query->commandType != CMD_SELECT ||
+ query->intoClause ||
+ query->setOperations ||
+ query->hasAggs ||
+ query->havingQual ||
+ query->limitOffset ||
+ query->limitCount ||
+ query->rowMarks)
+ return false;
/*
- * Now build the InClauseInfo node.
+ * Mustn't throw away the targetlist if it contains set-returning
+ * functions; those could affect whether zero rows are returned!
*/
- ininfo = makeNode(InClauseInfo);
- ininfo->lefthand = left_varnos;
- ininfo->righthand = bms_make_singleton(rtindex);
- ininfo->in_operators = in_operators;
+ if (expression_returns_set((Node *) query->targetList))
+ return false;
/*
- * ininfo->sub_targetlist must be filled with a list of expressions that
- * would need to be unique-ified if we try to implement the IN using a
- * regular join to unique-ified subquery output. This is most easily done
- * by applying convert_testexpr to just the RHS inputs of the testexpr
- * operators. That handles cases like type coercions of the subquery
- * outputs, clauses dropped due to const-simplification, etc.
+ * Otherwise, we can throw away the targetlist, as well as any GROUP,
+ * DISTINCT, and ORDER BY clauses; none of those clauses will change
+ * a nonzero-rows result to zero rows or vice versa. (Furthermore,
+ * since our parsetree representation of these clauses depends on the
+ * targetlist, we'd better throw them away if we drop the targetlist.)
*/
- ininfo->sub_targetlist = (List *) convert_testexpr(root,
- (Node *) right_exprs,
- subquery_vars);
+ query->targetList = NIL;
+ query->groupClause = NIL;
+ query->distinctClause = NIL;
+ query->sortClause = NIL;
+ query->hasDistinctOn = false;
- /* Add the completed node to the query's list */
- root->in_info_list = lappend(root->in_info_list, ininfo);
+ return true;
+}
- return result;
+/*
+ * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join?
+ *
+ * The caller has found an EXISTS SubLink at the top level of WHERE, or just
+ * underneath a NOT, but has not checked the properties of the SubLink
+ * further. Decide whether it is appropriate to process this SubLink in join
+ * style. If not, return NULL. If so, build the qual clause(s) to replace
+ * the SubLink, and return them. (In the NOT case, the returned clauses are
+ * intended to replace the NOT as well.) The qual clauses are wrapped in a
+ * FlattenedSubLink node to help later processing place them properly.
+ *
+ * Side effects of a successful conversion include adding the SubLink's
+ * subselect to the query's rangetable.
+ */
+Node *
+convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
+ bool under_not)
+{
+ Query *parse = root->parse;
+ Query *subselect = (Query *) sublink->subselect;
+ Node *whereClause;
+ int rtoffset;
+ int varno;
+ Relids clause_varnos;
+ Relids left_varnos;
+ Relids right_varnos;
+ Relids subselect_varnos;
+ FlattenedSubLink *fslink;
+
+ Assert(sublink->subLinkType == EXISTS_SUBLINK);
+
+ /*
+ * Copy the subquery so we can modify it safely (see comments in
+ * make_subplan).
+ */
+ subselect = (Query *) copyObject(subselect);
+
+ /*
+ * See if the subquery can be simplified based on the knowledge that
+ * it's being used in EXISTS(). If we aren't able to get rid of its
+ * targetlist, we have to fail, because the pullup operation leaves
+ * us with noplace to evaluate the targetlist.
+ */
+ if (!simplify_EXISTS_query(subselect))
+ return NULL;
+
+ /*
+ * Separate out the WHERE clause. (We could theoretically also remove
+ * top-level plain JOIN/ON clauses, but it's probably not worth the
+ * trouble.)
+ */
+ whereClause = subselect->jointree->quals;
+ subselect->jointree->quals = NULL;
+
+ /*
+ * The rest of the sub-select must not refer to any Vars of the parent
+ * query. (Vars of higher levels should be okay, though.)
+ */
+ if (contain_vars_of_level((Node *) subselect, 1))
+ return NULL;
+
+ /*
+ * On the other hand, the WHERE clause must contain some Vars of the
+ * parent query, else it's not gonna be a join.
+ */
+ if (!contain_vars_of_level(whereClause, 1))
+ return NULL;
+
+ /*
+ * We don't risk optimizing if the WHERE clause is volatile, either.
+ */
+ if (contain_volatile_functions(whereClause))
+ return NULL;
+
+ /*
+ * Also disallow SubLinks within the WHERE clause. (XXX this could
+ * probably be supported, but it would complicate the transformation
+ * below, and it doesn't seem worth worrying about in a first pass.)
+ */
+ if (contain_subplans(whereClause))
+ return NULL;
+
+ /*
+ * Okay, pull up the sub-select into top range table and jointree.
+ *
+ * We rely here on the assumption that the outer query has no references
+ * to the inner (necessarily true). Therefore this is a lot easier than
+ * what pull_up_subqueries has to go through.
+ *
+ * In fact, it's even easier than what convert_ANY_sublink_to_join has
+ * to do. The machinations of simplify_EXISTS_query ensured that there
+ * is nothing interesting in the subquery except an rtable and jointree,
+ * and even the jointree FromExpr no longer has quals. So we can just
+ * append the rtable to our own and append the fromlist to our own.
+ * But first, adjust all level-zero varnos in the subquery to account
+ * for the rtable merger.
+ */
+ rtoffset = list_length(parse->rtable);
+ OffsetVarNodes((Node *) subselect, rtoffset, 0);
+ OffsetVarNodes(whereClause, rtoffset, 0);
+
+ /*
+ * Upper-level vars in subquery will now be one level closer to their
+ * parent than before; in particular, anything that had been level 1
+ * becomes level zero.
+ */
+ IncrementVarSublevelsUp((Node *) subselect, -1, 1);
+ IncrementVarSublevelsUp(whereClause, -1, 1);
+
+ /*
+ * Now that the WHERE clause is adjusted to match the parent query
+ * environment, we can easily identify all the level-zero rels it uses.
+ * The ones <= rtoffset are "left rels" of the join we're forming,
+ * and the ones > rtoffset are "right rels".
+ */
+ clause_varnos = pull_varnos(whereClause);
+ left_varnos = right_varnos = NULL;
+ while ((varno = bms_first_member(clause_varnos)) >= 0)
+ {
+ if (varno <= rtoffset)
+ left_varnos = bms_add_member(left_varnos, varno);
+ else
+ right_varnos = bms_add_member(right_varnos, varno);
+ }
+ bms_free(clause_varnos);
+ Assert(!bms_is_empty(left_varnos));
+
+ /* Also identify all the rels syntactically within the subselect */
+ subselect_varnos = get_relids_in_jointree((Node *) subselect->jointree);
+ Assert(bms_is_subset(right_varnos, subselect_varnos));
+
+ /* Now we can attach the modified subquery rtable to the parent */
+ parse->rtable = list_concat(parse->rtable, subselect->rtable);
+
+ /*
+ * We assume it's okay to add the pulled-up subquery to the topmost FROM
+ * list. This should be all right for EXISTS clauses appearing in WHERE
+ * or in upper-level plain JOIN/ON clauses. EXISTS appearing below any
+ * outer joins couldn't be placed there, however.
+ */
+ parse->jointree->fromlist = list_concat(parse->jointree->fromlist,
+ subselect->jointree->fromlist);
+
+ /*
+ * Now build the FlattenedSubLink node.
+ */
+ fslink = makeNode(FlattenedSubLink);
+ fslink->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
+ fslink->lefthand = left_varnos;
+ fslink->righthand = subselect_varnos;
+ fslink->quals = (Expr *) whereClause;
+
+ return (Node *) fslink;
}
/*