From: Richard Guo Date: Thu, 13 Mar 2025 07:36:03 +0000 (+0900) Subject: Fix incorrect handling of subquery pullup X-Git-Tag: REL_18_BETA1~594 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=cc5d98525d43c22b98f360ef0f2c8d7dc57f04dc;p=postgresql.git Fix incorrect handling of subquery pullup When pulling up a subquery, if the subquery's target list items are used in grouping set columns, we need to wrap them in PlaceHolderVars. This ensures that expressions retain their separate identity so that they will match grouping set columns when appropriate. In 90947674f, we decided to wrap subquery outputs that are non-var expressions in PlaceHolderVars. This prevents const-simplification from merging them into the surrounding expressions after subquery pullup, which could otherwise lead to failing to match those subexpressions to grouping set columns, with the effect that they'd not go to null when expected. However, that left some loose ends. If the subquery's target list contains two or more identical Var expressions, we can still fail to match the Var expression to the expected grouping set expression. This is not related to const-simplification, but rather to how we match expressions to lower target items in setrefs.c. For sort/group expressions, we use ressortgroupref matching, which works well. For other expressions, we primarily rely on comparing the expressions to determine if they are the same. Therefore, we need a way to prevent setrefs.c from matching the expression to some other identical ones. To fix, wrap all subquery outputs in PlaceHolderVars if the parent query uses grouping sets, ensuring that they preserve their separate identity throughout the whole planning process. Reported-by: Dean Rasheed Author: Richard Guo Reviewed-by: Dean Rasheed Discussion: https://postgr.es/m/CAMbWs4-meSahaanKskpBn0KKxdHAXC1_EJCVWHxEodqirrGJnw@mail.gmail.com --- diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 794b98e560d..d131a5bbc59 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -57,6 +57,15 @@ typedef struct nullingrel_info int rtlength; /* used only for assertion checks */ } nullingrel_info; +/* Options for wrapping an expression for identification purposes */ +typedef enum ReplaceWrapOption +{ + REPLACE_WRAP_NONE, /* no expressions need to be wrapped */ + REPLACE_WRAP_ALL, /* all expressions need to be wrapped */ + REPLACE_WRAP_VARFREE, /* variable-free expressions need to be + * wrapped */ +} ReplaceWrapOption; + typedef struct pullup_replace_vars_context { PlannerInfo *root; @@ -70,7 +79,7 @@ typedef struct pullup_replace_vars_context * target_rte->lateral) */ bool *outer_hasSubLinks; /* -> outer query's hasSubLinks */ int varno; /* varno of subquery */ - bool wrap_non_vars; /* do we need all non-Var outputs to be PHVs? */ + ReplaceWrapOption wrap_option; /* do we need certain outputs to be PHVs? */ Node **rv_cache; /* cache for results with PHVs */ } pullup_replace_vars_context; @@ -1025,23 +1034,18 @@ expand_virtual_generated_columns(PlannerInfo *root) rvcontext.outer_hasSubLinks = NULL; rvcontext.varno = rt_index; /* this flag will be set below, if needed */ - rvcontext.wrap_non_vars = false; + rvcontext.wrap_option = REPLACE_WRAP_NONE; /* initialize cache array with indexes 0 .. length(tlist) */ rvcontext.rv_cache = palloc0((list_length(tlist) + 1) * sizeof(Node *)); /* * If the query uses grouping sets, we need a PlaceHolderVar for - * anything that's not a simple Var. Again, this ensures that - * expressions retain their separate identity so that they will - * match grouping set columns when appropriate. (It'd be - * sufficient to wrap values used in grouping set columns, and do - * so only in non-aggregated portions of the tlist and havingQual, - * but that would require a lot of infrastructure that - * pullup_replace_vars hasn't currently got.) + * each expression of the relation's targetlist items. (See + * comments in pull_up_simple_subquery().) */ if (parse->groupingSets) - rvcontext.wrap_non_vars = true; + rvcontext.wrap_option = REPLACE_WRAP_ALL; /* * Apply pullup variable replacement throughout the query tree. @@ -1435,22 +1439,22 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte, rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = varno; /* this flag will be set below, if needed */ - rvcontext.wrap_non_vars = false; + rvcontext.wrap_option = REPLACE_WRAP_NONE; /* initialize cache array with indexes 0 .. length(tlist) */ rvcontext.rv_cache = palloc0((list_length(subquery->targetList) + 1) * sizeof(Node *)); /* * If the parent query uses grouping sets, we need a PlaceHolderVar for - * anything that's not a simple Var. This ensures that expressions retain - * their separate identity so that they will match grouping set columns - * when appropriate. (It'd be sufficient to wrap values used in grouping - * set columns, and do so only in non-aggregated portions of the tlist and - * havingQual, but that would require a lot of infrastructure that - * pullup_replace_vars hasn't currently got.) + * each expression of the subquery's targetlist items. This ensures that + * expressions retain their separate identity so that they will match + * grouping set columns when appropriate. (It'd be sufficient to wrap + * values used in grouping set columns, and do so only in non-aggregated + * portions of the tlist and havingQual, but that would require a lot of + * infrastructure that pullup_replace_vars hasn't currently got.) */ if (parse->groupingSets) - rvcontext.wrap_non_vars = true; + rvcontext.wrap_option = REPLACE_WRAP_ALL; /* * Replace all of the top query's references to the subquery's outputs @@ -1976,7 +1980,7 @@ pull_up_simple_values(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte) rvcontext.nullinfo = NULL; rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = varno; - rvcontext.wrap_non_vars = false; + rvcontext.wrap_option = REPLACE_WRAP_NONE; /* initialize cache array with indexes 0 .. length(tlist) */ rvcontext.rv_cache = palloc0((list_length(tlist) + 1) * sizeof(Node *)); @@ -2144,18 +2148,18 @@ pull_up_constant_function(PlannerInfo *root, Node *jtnode, rvcontext.outer_hasSubLinks = &parse->hasSubLinks; rvcontext.varno = ((RangeTblRef *) jtnode)->rtindex; /* this flag will be set below, if needed */ - rvcontext.wrap_non_vars = false; + rvcontext.wrap_option = REPLACE_WRAP_NONE; /* initialize cache array with indexes 0 .. length(tlist) */ rvcontext.rv_cache = palloc0((list_length(rvcontext.targetlist) + 1) * sizeof(Node *)); /* * If the parent query uses grouping sets, we need a PlaceHolderVar for - * anything that's not a simple Var. (See comments in + * each expression of the subquery's targetlist items. (See comments in * pull_up_simple_subquery().) */ if (parse->groupingSets) - rvcontext.wrap_non_vars = true; + rvcontext.wrap_option = REPLACE_WRAP_ALL; /* * Replace all of the top query's references to the RTE's output with @@ -2406,13 +2410,13 @@ perform_pullup_replace_vars(PlannerInfo *root, */ if (containing_appendrel) { - bool save_wrap_non_vars = rvcontext->wrap_non_vars; + ReplaceWrapOption save_wrap_option = rvcontext->wrap_option; - rvcontext->wrap_non_vars = false; + rvcontext->wrap_option = REPLACE_WRAP_NONE; containing_appendrel->translated_vars = (List *) pullup_replace_vars((Node *) containing_appendrel->translated_vars, rvcontext); - rvcontext->wrap_non_vars = save_wrap_non_vars; + rvcontext->wrap_option = save_wrap_option; return; } @@ -2573,24 +2577,24 @@ replace_vars_in_jointree(Node *jtnode, else if (IsA(jtnode, JoinExpr)) { JoinExpr *j = (JoinExpr *) jtnode; - bool save_wrap_non_vars = context->wrap_non_vars; + ReplaceWrapOption save_wrap_option = context->wrap_option; replace_vars_in_jointree(j->larg, context); replace_vars_in_jointree(j->rarg, context); /* - * Use PHVs within the join quals of a full join. Otherwise, we - * cannot identify which side of the join a pulled-up var-free - * expression came from, which can lead to failure to make a plan at - * all because none of the quals appear to be mergeable or hashable - * conditions. + * Use PHVs within the join quals of a full join for variable-free + * expressions. Otherwise, we cannot identify which side of the join + * a pulled-up variable-free expression came from, which can lead to + * failure to make a plan at all because none of the quals appear to + * be mergeable or hashable conditions. */ if (j->jointype == JOIN_FULL) - context->wrap_non_vars = true; + context->wrap_option = REPLACE_WRAP_VARFREE; j->quals = pullup_replace_vars(j->quals, context); - context->wrap_non_vars = save_wrap_non_vars; + context->wrap_option = save_wrap_option; } else elog(ERROR, "unrecognized node type: %d", @@ -2630,10 +2634,11 @@ pullup_replace_vars_callback(Var *var, * We need a PlaceHolderVar if the Var-to-be-replaced has nonempty * varnullingrels (unless we find below that the replacement expression is * a Var or PlaceHolderVar that we can just add the nullingrels to). We - * also need one if the caller has instructed us that all non-Var/PHV + * also need one if the caller has instructed us that certain expression * replacements need to be wrapped for identification purposes. */ - need_phv = (var->varnullingrels != NULL) || rcon->wrap_non_vars; + need_phv = (var->varnullingrels != NULL) || + (rcon->wrap_option != REPLACE_WRAP_NONE); /* * If PlaceHolderVars are needed, we cache the modified expressions in @@ -2673,7 +2678,12 @@ pullup_replace_vars_callback(Var *var, { bool wrap; - if (varattno == InvalidAttrNumber) + if (rcon->wrap_option == REPLACE_WRAP_ALL) + { + /* Caller told us to wrap all expressions in a PlaceHolderVar */ + wrap = true; + } + else if (varattno == InvalidAttrNumber) { /* * Insert PlaceHolderVar for whole-tuple reference. Notice @@ -2733,11 +2743,6 @@ pullup_replace_vars_callback(Var *var, } } } - else if (rcon->wrap_non_vars) - { - /* Caller told us to wrap all non-Vars in a PlaceHolderVar */ - wrap = true; - } else { /* @@ -2769,7 +2774,11 @@ pullup_replace_vars_callback(Var *var, * This analysis could be tighter: in particular, a non-strict * construct hidden within a lower-level PlaceHolderVar is not * reason to add another PHV. But for now it doesn't seem - * worth the code to be more exact. + * worth the code to be more exact. This is also why it's + * preferable to handle bare PHVs in the above branch, rather + * than this branch. We also prefer to handle bare Vars in a + * separate branch, as it's cheaper this way and parallels the + * handling of PHVs. * * For a LATERAL subquery, we have to check the actual var * membership of the node, but if it's non-lateral then any diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 98a265cd3d5..1c61085a0a7 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -1421,7 +1421,7 @@ remove_nulling_relids_mutator(Node *node, * Note: it might seem desirable to remove the PHV altogether if * phnullingrels goes to empty. Currently we dare not do that * because we use PHVs in some cases to enforce separate identity - * of subexpressions; see wrap_non_vars usages in prepjointree.c. + * of subexpressions; see wrap_option usages in prepjointree.c. */ /* Copy the PlaceHolderVar and mutate what's below ... */ phv = (PlaceHolderVar *) diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 449f0384225..35e4cb47ebe 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -434,6 +434,35 @@ select x, not x as not_x, q2 from | | 4567890123456789 (5 rows) +select x, y + from (select four as x, four as y from tenk1) as t + group by grouping sets (x, y) + having y is null + order by 1, 2; + x | y +---+--- + 0 | + 1 | + 2 | + 3 | +(4 rows) + +select x, y || 'y' + from (select four as x, four as y from tenk1) as t + group by grouping sets (x, y) + order by 1, 2; + x | ?column? +---+---------- + 0 | + 1 | + 2 | + 3 | + | 0y + | 1y + | 2y + | 3y +(8 rows) + -- check qual push-down rules for a subquery with grouping sets explain (verbose, costs off) select * from ( diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index 21cd3121940..38d3cdd0fd8 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -172,6 +172,17 @@ select x, not x as not_x, q2 from group by grouping sets(x, q2) order by x, q2; +select x, y + from (select four as x, four as y from tenk1) as t + group by grouping sets (x, y) + having y is null + order by 1, 2; + +select x, y || 'y' + from (select four as x, four as y from tenk1) as t + group by grouping sets (x, y) + order by 1, 2; + -- check qual push-down rules for a subquery with grouping sets explain (verbose, costs off) select * from ( diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index a2e592dbbbb..93339ef3c58 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2477,6 +2477,7 @@ RepOriginId ReparameterizeForeignPathByChild_function ReplaceVarsFromTargetList_context ReplaceVarsNoMatchOption +ReplaceWrapOption ReplicaIdentityStmt ReplicationKind ReplicationSlot