From da5800d5fa636c6e10c9c98402d872c76aa1c8d0 Mon Sep 17 00:00:00 2001 From: David Rowley Date: Tue, 17 Jan 2023 16:37:06 +1300 Subject: [PATCH] Don't presort ORDER BY/DISTINCT Aggrefs with volatile functions In 1349d2790, we gave the planner the ability to provide ORDER BY/DISTINCT Aggrefs with presorted input so that nodeAgg would not have to perform sorts during execution. That commit failed to properly consider the implications of if the Aggref had a volatile function in its ORDER BY/DISTINCT clause. As it happened, this resulted in an ERROR about the volatile function being missing from the targetlist. Here, instead of adding the volatile function to the targetlist, we just never consider an Aggref with a volatile function in its ORDER BY/DISTINCT clause when choosing which Aggrefs we should sort by. We do this as if we were to choose a plan which provided these aggregates with presorted input, then if there were many such aggregates which could all share the same sort order, then it may be surprising if they all shared the same sort sometimes and didn't at other times when some other set of aggregates were given presorted results. We can avoid this inconsistency by just never providing these volatile function aggregates with presorted input. Reported-by: Dean Rasheed Discussion: https://postgr.es/m/CAEZATCWETioXs5kY8vT6BVguY41_wD962VDk=u_Nvd7S1UXzuQ@mail.gmail.com --- src/backend/optimizer/plan/planner.c | 55 +++++++++++++++++++++--- src/test/regress/expected/aggregates.out | 19 ++++++++ src/test/regress/sql/aggregates.sql | 11 +++++ 3 files changed, 79 insertions(+), 6 deletions(-) diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 044fb246665..093ace08644 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -3147,6 +3147,27 @@ reorder_grouping_sets(List *groupingSets, List *sortclause) return result; } +/* + * has_volatile_pathkey + * Returns true if any PathKey in 'keys' has an EquivalenceClass + * containing a volatile function. Otherwise returns false. + */ +static bool +has_volatile_pathkey(List *keys) +{ + ListCell *lc; + + foreach(lc, keys) + { + PathKey *pathkey = lfirst_node(PathKey, lc); + + if (pathkey->pk_eclass->ec_has_volatile) + return true; + } + + return false; +} + /* * make_pathkeys_for_groupagg * Determine the pathkeys for the GROUP BY clause and/or any ordered @@ -3168,6 +3189,19 @@ reorder_grouping_sets(List *groupingSets, List *sortclause) * * When the best pathkeys are found we also mark each Aggref that can use * those pathkeys as aggpresorted = true. + * + * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any + * volatile functions, we never make use of these pathkeys. We want to ensure + * that sorts using volatile functions are done independently in each Aggref + * rather than once at the query level. If we were to allow this then Aggrefs + * with compatible sort orders would all transition their rows in the same + * order if those pathkeys were deemed to be the best pathkeys to sort on. + * Whereas, if some other set of Aggref's pathkeys happened to be deemed + * better pathkeys to sort on, then the volatile function Aggrefs would be + * left to perform their sorts individually. To avoid this inconsistent + * behavior which could make Aggref results depend on what other Aggrefs the + * query contains, we always force Aggrefs with volatile functions to perform + * their own sorts. */ static List * make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, @@ -3254,20 +3288,33 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, AggInfo *agginfo = list_nth_node(AggInfo, root->agginfos, i); Aggref *aggref = linitial_node(Aggref, agginfo->aggrefs); List *sortlist; + List *pathkeys; if (aggref->aggdistinct != NIL) sortlist = aggref->aggdistinct; else sortlist = aggref->aggorder; + pathkeys = make_pathkeys_for_sortclauses(root, sortlist, + aggref->args); + + /* + * Ignore Aggrefs which have volatile functions in their ORDER BY + * or DISTINCT clause. + */ + if (has_volatile_pathkey(pathkeys)) + { + unprocessed_aggs = bms_del_member(unprocessed_aggs, i); + continue; + } + /* * When not set yet, take the pathkeys from the first unprocessed * aggregate. */ if (currpathkeys == NIL) { - currpathkeys = make_pathkeys_for_sortclauses(root, sortlist, - aggref->args); + currpathkeys = pathkeys; /* include the GROUP BY pathkeys, if they exist */ if (grouppathkeys != NIL) @@ -3279,11 +3326,7 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist, } else { - List *pathkeys; - /* now look for a stronger set of matching pathkeys */ - pathkeys = make_pathkeys_for_sortclauses(root, sortlist, - aggref->args); /* include the GROUP BY pathkeys, if they exist */ if (grouppathkeys != NIL) diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index 309c2dc8654..ae3b9053318 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1471,6 +1471,25 @@ group by ten; -> Seq Scan on tenk1 (5 rows) +-- Ensure that we never choose to provide presorted input to an Aggref with +-- a volatile function in the ORDER BY / DISTINCT clause. We want to ensure +-- these sorts are performed individually rather than at the query level. +explain (costs off) +select + sum(unique1 order by two), sum(unique1 order by four), + sum(unique1 order by four, two), sum(unique1 order by two, random()), + sum(unique1 order by two, random(), random() + 1) +from tenk1 +group by ten; + QUERY PLAN +---------------------------------- + GroupAggregate + Group Key: ten + -> Sort + Sort Key: ten, four, two + -> Seq Scan on tenk1 +(5 rows) + -- Ensure no ordering is requested when enable_presorted_aggregate is off set enable_presorted_aggregate to off; explain (costs off) diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 15f6be8e15a..514e3b2b397 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -546,6 +546,17 @@ select from tenk1 group by ten; +-- Ensure that we never choose to provide presorted input to an Aggref with +-- a volatile function in the ORDER BY / DISTINCT clause. We want to ensure +-- these sorts are performed individually rather than at the query level. +explain (costs off) +select + sum(unique1 order by two), sum(unique1 order by four), + sum(unique1 order by four, two), sum(unique1 order by two, random()), + sum(unique1 order by two, random(), random() + 1) +from tenk1 +group by ten; + -- Ensure no ordering is requested when enable_presorted_aggregate is off set enable_presorted_aggregate to off; explain (costs off) -- 2.30.2