diff options
Diffstat (limited to 'src/backend/optimizer/plan/subselect.c')
| -rw-r--r-- | src/backend/optimizer/plan/subselect.c | 348 |
1 files changed, 258 insertions, 90 deletions
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 72e951abd0..1e4e9fe565 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.132 2008/07/10 02:14:03 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.133 2008/08/14 18:47:59 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -21,6 +21,7 @@ #include "optimizer/cost.h" #include "optimizer/planmain.h" #include "optimizer/planner.h" +#include "optimizer/prep.h" #include "optimizer/subselect.h" #include "optimizer/var.h" #include "parser/parse_expr.h" @@ -62,6 +63,7 @@ static Node *convert_testexpr_mutator(Node *node, convert_testexpr_context *context); static bool subplan_is_hashable(SubLink *slink, SubPlan *node, Plan *plan); static bool hash_ok_operator(OpExpr *expr); +static bool simplify_EXISTS_query(Query *query); static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); static Node *process_sublinks_mutator(Node *node, process_sublinks_context *context); @@ -217,11 +219,16 @@ generate_new_param(PlannerInfo *root, Oid paramtype, int32 paramtypmod) static Oid get_first_col_type(Plan *plan) { - TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist); + /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */ + if (plan->targetlist) + { + TargetEntry *tent = (TargetEntry *) linitial(plan->targetlist); - Assert(IsA(tent, TargetEntry)); - Assert(!tent->resjunk); - return exprType((Node *) tent->expr); + Assert(IsA(tent, TargetEntry)); + if (!tent->resjunk) + return exprType((Node *) tent->expr); + } + return VOIDOID; } /* @@ -259,6 +266,12 @@ make_subplan(PlannerInfo *root, SubLink *slink, Node *testexpr, bool isTopQual) subquery = (Query *) copyObject(subquery); /* + * If it's an EXISTS subplan, we might be able to simplify it. + */ + if (slink->subLinkType == EXISTS_SUBLINK) + (void) simplify_EXISTS_query(subquery); + + /* * For an EXISTS subplan, tell lower-level planner to expect that only the * first tuple will be retrieved. For ALL and ANY subplans, we will be * able to stop evaluating if the test condition fails, so very often not @@ -710,80 +723,32 @@ hash_ok_operator(OpExpr *expr) } /* - * convert_IN_to_join: can we convert an IN SubLink to join style? + * convert_ANY_sublink_to_join: can we convert an ANY SubLink to a join? * - * The caller has found a SubLink at the top level of WHERE, but has not - * checked the properties of the SubLink at all. Decide whether it is + * The caller has found an ANY SubLink at the top level of WHERE, but has not + * checked the properties of the SubLink further. Decide whether it is * appropriate to process this SubLink in join style. If not, return NULL. * If so, build the qual clause(s) to replace the SubLink, and return them. + * The qual clauses are wrapped in a FlattenedSubLink node to help later + * processing place them properly. * * Side effects of a successful conversion include adding the SubLink's - * subselect to the query's rangetable and adding an InClauseInfo node to - * its in_info_list. + * subselect to the query's rangetable. */ Node * -convert_IN_to_join(PlannerInfo *root, SubLink *sublink) +convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink) { Query *parse = root->parse; Query *subselect = (Query *) sublink->subselect; - List *in_operators; - List *left_exprs; - List *right_exprs; Relids left_varnos; int rtindex; RangeTblEntry *rte; RangeTblRef *rtr; List *subquery_vars; - InClauseInfo *ininfo; - Node *result; - - /* - * The sublink type must be "= ANY" --- that is, an IN operator. We - * expect that the test expression will be either a single OpExpr, or an - * AND-clause containing OpExprs. (If it's anything else then the parser - * must have determined that the operators have non-equality-like - * semantics. In the OpExpr case we can't be sure what the operator's - * semantics are like, and must check for ourselves.) - */ - if (sublink->subLinkType != ANY_SUBLINK) - return NULL; - if (sublink->testexpr && IsA(sublink->testexpr, OpExpr)) - { - OpExpr *op = (OpExpr *) sublink->testexpr; - Oid opno = op->opno; - List *opfamilies; - List *opstrats; - - if (list_length(op->args) != 2) - return NULL; /* not binary operator? */ - get_op_btree_interpretation(opno, &opfamilies, &opstrats); - if (!list_member_int(opstrats, ROWCOMPARE_EQ)) - return NULL; - in_operators = list_make1_oid(opno); - left_exprs = list_make1(linitial(op->args)); - right_exprs = list_make1(lsecond(op->args)); - } - else if (and_clause(sublink->testexpr)) - { - ListCell *lc; + Expr *quals; + FlattenedSubLink *fslink; - /* OK, but we need to extract the per-column info */ - in_operators = left_exprs = right_exprs = NIL; - foreach(lc, ((BoolExpr *) sublink->testexpr)->args) - { - OpExpr *op = (OpExpr *) lfirst(lc); - - if (!IsA(op, OpExpr)) /* probably shouldn't happen */ - return NULL; - if (list_length(op->args) != 2) - return NULL; /* not binary operator? */ - in_operators = lappend_oid(in_operators, op->opno); - left_exprs = lappend(left_exprs, linitial(op->args)); - right_exprs = lappend(right_exprs, lsecond(op->args)); - } - } - else - return NULL; + Assert(sublink->subLinkType == ANY_SUBLINK); /* * The sub-select must not refer to any Vars of the parent query. (Vars of @@ -793,16 +758,14 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) return NULL; /* - * The left-hand expressions must contain some Vars of the current query, - * else it's not gonna be a join. + * The test expression must contain some Vars of the current query, + * else it's not gonna be a join. (Note that it won't have Vars + * referring to the subquery, rather Params.) */ - left_varnos = pull_varnos((Node *) left_exprs); + left_varnos = pull_varnos(sublink->testexpr); if (bms_is_empty(left_varnos)) return NULL; - /* ... and the right-hand expressions better not contain Vars at all */ - Assert(!contain_var_clause((Node *) right_exprs)); - /* * The combining operators and left-hand expressions mustn't be volatile. */ @@ -819,12 +782,19 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) */ rte = addRangeTableEntryForSubquery(NULL, subselect, - makeAlias("IN_subquery", NIL), + makeAlias("ANY_subquery", NIL), false); parse->rtable = lappend(parse->rtable, rte); rtindex = list_length(parse->rtable); rtr = makeNode(RangeTblRef); rtr->rtindex = rtindex; + + /* + * We assume it's okay to add the pulled-up subquery to the topmost FROM + * list. This should be all right for ANY clauses appearing in WHERE + * or in upper-level plain JOIN/ON clauses. ANYs appearing below any + * outer joins couldn't be placed there, however. + */ parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr); /* @@ -837,34 +807,232 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) /* * Build the result qual expression, replacing Params with these Vars. */ - result = convert_testexpr(root, - sublink->testexpr, - subquery_vars); + quals = (Expr *) convert_testexpr(root, + sublink->testexpr, + subquery_vars); + + /* + * Now build the FlattenedSubLink node. + */ + fslink = makeNode(FlattenedSubLink); + fslink->jointype = JOIN_SEMI; + fslink->lefthand = left_varnos; + fslink->righthand = bms_make_singleton(rtindex); + fslink->quals = quals; + + return (Node *) fslink; +} + +/* + * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery + * + * The only thing that matters about an EXISTS query is whether it returns + * zero or more than zero rows. Therefore, we can remove certain SQL features + * that won't affect that. The only part that is really likely to matter in + * typical usage is simplifying the targetlist: it's a common habit to write + * "SELECT * FROM" even though there is no need to evaluate any columns. + * + * Note: by suppressing the targetlist we could cause an observable behavioral + * change, namely that any errors that might occur in evaluating the tlist + * won't occur, nor will other side-effects of volatile functions. This seems + * unlikely to bother anyone in practice. + * + * Returns TRUE if was able to discard the targetlist, else FALSE. + */ +static bool +simplify_EXISTS_query(Query *query) +{ + /* + * We don't try to simplify at all if the query uses set operations, + * aggregates, HAVING, LIMIT/OFFSET, or FOR UPDATE/SHARE; none of these + * seem likely in normal usage and their possible effects are complex. + */ + if (query->commandType != CMD_SELECT || + query->intoClause || + query->setOperations || + query->hasAggs || + query->havingQual || + query->limitOffset || + query->limitCount || + query->rowMarks) + return false; /* - * Now build the InClauseInfo node. + * Mustn't throw away the targetlist if it contains set-returning + * functions; those could affect whether zero rows are returned! */ - ininfo = makeNode(InClauseInfo); - ininfo->lefthand = left_varnos; - ininfo->righthand = bms_make_singleton(rtindex); - ininfo->in_operators = in_operators; + if (expression_returns_set((Node *) query->targetList)) + return false; /* - * ininfo->sub_targetlist must be filled with a list of expressions that - * would need to be unique-ified if we try to implement the IN using a - * regular join to unique-ified subquery output. This is most easily done - * by applying convert_testexpr to just the RHS inputs of the testexpr - * operators. That handles cases like type coercions of the subquery - * outputs, clauses dropped due to const-simplification, etc. + * Otherwise, we can throw away the targetlist, as well as any GROUP, + * DISTINCT, and ORDER BY clauses; none of those clauses will change + * a nonzero-rows result to zero rows or vice versa. (Furthermore, + * since our parsetree representation of these clauses depends on the + * targetlist, we'd better throw them away if we drop the targetlist.) */ - ininfo->sub_targetlist = (List *) convert_testexpr(root, - (Node *) right_exprs, - subquery_vars); + query->targetList = NIL; + query->groupClause = NIL; + query->distinctClause = NIL; + query->sortClause = NIL; + query->hasDistinctOn = false; - /* Add the completed node to the query's list */ - root->in_info_list = lappend(root->in_info_list, ininfo); + return true; +} - return result; +/* + * convert_EXISTS_sublink_to_join: can we convert an EXISTS SubLink to a join? + * + * The caller has found an EXISTS SubLink at the top level of WHERE, or just + * underneath a NOT, but has not checked the properties of the SubLink + * further. Decide whether it is appropriate to process this SubLink in join + * style. If not, return NULL. If so, build the qual clause(s) to replace + * the SubLink, and return them. (In the NOT case, the returned clauses are + * intended to replace the NOT as well.) The qual clauses are wrapped in a + * FlattenedSubLink node to help later processing place them properly. + * + * Side effects of a successful conversion include adding the SubLink's + * subselect to the query's rangetable. + */ +Node * +convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, + bool under_not) +{ + Query *parse = root->parse; + Query *subselect = (Query *) sublink->subselect; + Node *whereClause; + int rtoffset; + int varno; + Relids clause_varnos; + Relids left_varnos; + Relids right_varnos; + Relids subselect_varnos; + FlattenedSubLink *fslink; + + Assert(sublink->subLinkType == EXISTS_SUBLINK); + + /* + * Copy the subquery so we can modify it safely (see comments in + * make_subplan). + */ + subselect = (Query *) copyObject(subselect); + + /* + * See if the subquery can be simplified based on the knowledge that + * it's being used in EXISTS(). If we aren't able to get rid of its + * targetlist, we have to fail, because the pullup operation leaves + * us with noplace to evaluate the targetlist. + */ + if (!simplify_EXISTS_query(subselect)) + return NULL; + + /* + * Separate out the WHERE clause. (We could theoretically also remove + * top-level plain JOIN/ON clauses, but it's probably not worth the + * trouble.) + */ + whereClause = subselect->jointree->quals; + subselect->jointree->quals = NULL; + + /* + * The rest of the sub-select must not refer to any Vars of the parent + * query. (Vars of higher levels should be okay, though.) + */ + if (contain_vars_of_level((Node *) subselect, 1)) + return NULL; + + /* + * On the other hand, the WHERE clause must contain some Vars of the + * parent query, else it's not gonna be a join. + */ + if (!contain_vars_of_level(whereClause, 1)) + return NULL; + + /* + * We don't risk optimizing if the WHERE clause is volatile, either. + */ + if (contain_volatile_functions(whereClause)) + return NULL; + + /* + * Also disallow SubLinks within the WHERE clause. (XXX this could + * probably be supported, but it would complicate the transformation + * below, and it doesn't seem worth worrying about in a first pass.) + */ + if (contain_subplans(whereClause)) + return NULL; + + /* + * Okay, pull up the sub-select into top range table and jointree. + * + * We rely here on the assumption that the outer query has no references + * to the inner (necessarily true). Therefore this is a lot easier than + * what pull_up_subqueries has to go through. + * + * In fact, it's even easier than what convert_ANY_sublink_to_join has + * to do. The machinations of simplify_EXISTS_query ensured that there + * is nothing interesting in the subquery except an rtable and jointree, + * and even the jointree FromExpr no longer has quals. So we can just + * append the rtable to our own and append the fromlist to our own. + * But first, adjust all level-zero varnos in the subquery to account + * for the rtable merger. + */ + rtoffset = list_length(parse->rtable); + OffsetVarNodes((Node *) subselect, rtoffset, 0); + OffsetVarNodes(whereClause, rtoffset, 0); + + /* + * Upper-level vars in subquery will now be one level closer to their + * parent than before; in particular, anything that had been level 1 + * becomes level zero. + */ + IncrementVarSublevelsUp((Node *) subselect, -1, 1); + IncrementVarSublevelsUp(whereClause, -1, 1); + + /* + * Now that the WHERE clause is adjusted to match the parent query + * environment, we can easily identify all the level-zero rels it uses. + * The ones <= rtoffset are "left rels" of the join we're forming, + * and the ones > rtoffset are "right rels". + */ + clause_varnos = pull_varnos(whereClause); + left_varnos = right_varnos = NULL; + while ((varno = bms_first_member(clause_varnos)) >= 0) + { + if (varno <= rtoffset) + left_varnos = bms_add_member(left_varnos, varno); + else + right_varnos = bms_add_member(right_varnos, varno); + } + bms_free(clause_varnos); + Assert(!bms_is_empty(left_varnos)); + + /* Also identify all the rels syntactically within the subselect */ + subselect_varnos = get_relids_in_jointree((Node *) subselect->jointree); + Assert(bms_is_subset(right_varnos, subselect_varnos)); + + /* Now we can attach the modified subquery rtable to the parent */ + parse->rtable = list_concat(parse->rtable, subselect->rtable); + + /* + * We assume it's okay to add the pulled-up subquery to the topmost FROM + * list. This should be all right for EXISTS clauses appearing in WHERE + * or in upper-level plain JOIN/ON clauses. EXISTS appearing below any + * outer joins couldn't be placed there, however. + */ + parse->jointree->fromlist = list_concat(parse->jointree->fromlist, + subselect->jointree->fromlist); + + /* + * Now build the FlattenedSubLink node. + */ + fslink = makeNode(FlattenedSubLink); + fslink->jointype = under_not ? JOIN_ANTI : JOIN_SEMI; + fslink->lefthand = left_varnos; + fslink->righthand = subselect_varnos; + fslink->quals = (Expr *) whereClause; + + return (Node *) fslink; } /* |
