Don't presort ORDER BY/DISTINCT Aggrefs with volatile functions

author David Rowley <[email protected]>

Tue, 17 Jan 2023 03:37:06 +0000 (16:37 +1300)

committer David Rowley <[email protected]>

Tue, 17 Jan 2023 03:37:06 +0000 (16:37 +1300)
author David Rowley <[email protected]>
Tue, 17 Jan 2023 03:37:06 +0000 (16:37 +1300)
committer David Rowley <[email protected]>
Tue, 17 Jan 2023 03:37:06 +0000 (16:37 +1300)
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index 044fb246665482ee8520167a45f8c5e1c3416dcd..093ace08644989e041439ce8bb1fb915a59cc6b4 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -3147,6 +3147,27 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
     return result;
  }
  
+/*
+ * has_volatile_pathkey
+ *     Returns true if any PathKey in 'keys' has an EquivalenceClass
+ *     containing a volatile function.  Otherwise returns false.
+ */
+static bool
+has_volatile_pathkey(List *keys)
+{
+   ListCell   *lc;
+
+   foreach(lc, keys)
+   {
+       PathKey    *pathkey = lfirst_node(PathKey, lc);
+
+       if (pathkey->pk_eclass->ec_has_volatile)
+           return true;
+   }
+
+   return false;
+}
+
  /*
   * make_pathkeys_for_groupagg
   *     Determine the pathkeys for the GROUP BY clause and/or any ordered
@@ -3168,6 +3189,19 @@ reorder_grouping_sets(List *groupingSets, List *sortclause)
   *
   * When the best pathkeys are found we also mark each Aggref that can use
   * those pathkeys as aggpresorted = true.
+ *
+ * Note: When an aggregate function's ORDER BY / DISTINCT clause contains any
+ * volatile functions, we never make use of these pathkeys.  We want to ensure
+ * that sorts using volatile functions are done independently in each Aggref
+ * rather than once at the query level.  If we were to allow this then Aggrefs
+ * with compatible sort orders would all transition their rows in the same
+ * order if those pathkeys were deemed to be the best pathkeys to sort on.
+ * Whereas, if some other set of Aggref's pathkeys happened to be deemed
+ * better pathkeys to sort on, then the volatile function Aggrefs would be
+ * left to perform their sorts individually.  To avoid this inconsistent
+ * behavior which could make Aggref results depend on what other Aggrefs the
+ * query contains, we always force Aggrefs with volatile functions to perform
+ * their own sorts.
   */
  static List *
  make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
@@ -3254,20 +3288,33 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
             AggInfo    *agginfo = list_nth_node(AggInfo, root->agginfos, i);
             Aggref     *aggref = linitial_node(Aggref, agginfo->aggrefs);
             List       *sortlist;
+           List       *pathkeys;
  
             if (aggref->aggdistinct != NIL)
                 sortlist = aggref->aggdistinct;
             else
                 sortlist = aggref->aggorder;
  
+           pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
+                                                    aggref->args);
+
+           /*
+            * Ignore Aggrefs which have volatile functions in their ORDER BY
+            * or DISTINCT clause.
+            */
+           if (has_volatile_pathkey(pathkeys))
+           {
+               unprocessed_aggs = bms_del_member(unprocessed_aggs, i);
+               continue;
+           }
+
             /*
              * When not set yet, take the pathkeys from the first unprocessed
              * aggregate.
              */
             if (currpathkeys == NIL)
             {
-               currpathkeys = make_pathkeys_for_sortclauses(root, sortlist,
-                                                            aggref->args);
+               currpathkeys = pathkeys;
  
                 /* include the GROUP BY pathkeys, if they exist */
                 if (grouppathkeys != NIL)
@@ -3279,11 +3326,7 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
             }
             else
             {
-               List       *pathkeys;
-
                 /* now look for a stronger set of matching pathkeys */
-               pathkeys = make_pathkeys_for_sortclauses(root, sortlist,
-                                                        aggref->args);
  
                 /* include the GROUP BY pathkeys, if they exist */
                 if (grouppathkeys != NIL)
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out

index 309c2dc8654f5130ae7dcfdb911f199a4463faa2..ae3b90533182b1c6a2bea3efdfefaa82dd6574a2 100644 (file)
--- a/src/test/regress/expected/aggregates.out
+++ b/src/test/regress/expected/aggregates.out
@@ -1471,6 +1471,25 @@ group by ten;
           ->  Seq Scan on tenk1
  (5 rows)
  
+-- Ensure that we never choose to provide presorted input to an Aggref with
+-- a volatile function in the ORDER BY / DISTINCT clause.  We want to ensure
+-- these sorts are performed individually rather than at the query level.
+explain (costs off)
+select
+  sum(unique1 order by two), sum(unique1 order by four),
+  sum(unique1 order by four, two), sum(unique1 order by two, random()),
+  sum(unique1 order by two, random(), random() + 1)
+from tenk1
+group by ten;
+            QUERY PLAN            
+----------------------------------
+ GroupAggregate
+   Group Key: ten
+   ->  Sort
+         Sort Key: ten, four, two
+         ->  Seq Scan on tenk1
+(5 rows)
+
  -- Ensure no ordering is requested when enable_presorted_aggregate is off
  set enable_presorted_aggregate to off;
  explain (costs off)
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql

index 15f6be8e15ac2730d515d4225c82d9105e21972e..514e3b2b39712764641a0716fcade1e9f4d8eccc 100644 (file)
--- a/src/test/regress/sql/aggregates.sql
+++ b/src/test/regress/sql/aggregates.sql
@@ -546,6 +546,17 @@ select
  from tenk1
  group by ten;
  
+-- Ensure that we never choose to provide presorted input to an Aggref with
+-- a volatile function in the ORDER BY / DISTINCT clause.  We want to ensure
+-- these sorts are performed individually rather than at the query level.
+explain (costs off)
+select
+  sum(unique1 order by two), sum(unique1 order by four),
+  sum(unique1 order by four, two), sum(unique1 order by two, random()),
+  sum(unique1 order by two, random(), random() + 1)
+from tenk1
+group by ten;
+
  -- Ensure no ordering is requested when enable_presorted_aggregate is off
  set enable_presorted_aggregate to off;
  explain (costs off)
author	David Rowley <[email protected]>
	Tue, 17 Jan 2023 03:37:06 +0000 (16:37 +1300)
committer	David Rowley <[email protected]>
	Tue, 17 Jan 2023 03:37:06 +0000 (16:37 +1300)
src/backend/optimizer/plan/planner.c		patch \| blob \| blame \| history
src/test/regress/expected/aggregates.out		patch \| blob \| blame \| history
src/test/regress/sql/aggregates.sql		patch \| blob \| blame \| history