Fix issue with ORDER BY / DISTINCT aggregates and FILTER
authorDavid Rowley <[email protected]>
Sun, 20 Apr 2025 10:12:59 +0000 (22:12 +1200)
committerDavid Rowley <[email protected]>
Sun, 20 Apr 2025 10:12:59 +0000 (22:12 +1200)
1349d2790 added support so that aggregate functions with an ORDER BY or
DISTINCT clause could make use of presorted inputs to avoid an implicit
sort within nodeAgg.c.  That commit failed to consider that a FILTER
clause may exist that filters rows before the aggregate function
arguments are evaluated.  That can be problematic if an aggregate
argument contains an expression which could error out during evaluation.
It's perfectly valid to want to have a FILTER clause which eliminates
such values, and with the pre-sorted path added in 1349d2790, it was
possible that the planner would produce a plan with a Sort node above
the Aggregate to perform the sort on the aggregate's arguments long before
the Aggregate node would filter out the non-matching values.

Here we fix this by inspecting ORDER BY / DISTINCT aggregate functions
which have a FILTER clause to see if the aggregate's arguments are
anything more complex than a Var or a Const.  Evaluating these isn't
going to cause an error.  If we find any non-Var, non-Const parameters
then the planner will now opt to perform the sort in the Aggregate node
for these aggregates, i.e. disable the presorted aggregate optimization.

An alternative fix would have been to completely disallow the presorted
optimization for Aggrefs with any FILTER clause, but that wasn't done as
that could cause large performance regressions for queries that see
significant gains from 1349d2790 due to presorted results coming in from
an Index Scan.

Backpatch to 16, where 1349d2790 was introduced

Author: David Rowley <[email protected]>
Reported-by: Kaimeh <[email protected]>
Diagnosed-by: Tom Lane <[email protected]>
Discussion: https://postgr.es/m/CAK-%2BJz9J%3DQ06-M7cDJoPNeYbz5EZDqkjQbJnmRyQyzkbRGsYkA%40mail.gmail.com
Backpatch-through: 16

src/backend/optimizer/plan/planner.c
src/test/regress/expected/aggregates.out
src/test/regress/expected/sqljson.out
src/test/regress/sql/aggregates.sql

index a1bceafa30f9698af5b73c03e473ea5c07c9ee2a..11c2f33e30221bd9dd306e0ef5cd31e9b47b3d72 100644 (file)
@@ -3240,10 +3240,53 @@ adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
        if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
            continue;
 
-       /* only add aggregates with a DISTINCT or ORDER BY */
-       if (aggref->aggdistinct != NIL || aggref->aggorder != NIL)
-           unprocessed_aggs = bms_add_member(unprocessed_aggs,
-                                             foreach_current_index(lc));
+       /* Skip unless there's a DISTINCT or ORDER BY clause */
+       if (aggref->aggdistinct == NIL && aggref->aggorder == NIL)
+           continue;
+
+       /* Additional safety checks are needed if there's a FILTER clause */
+       if (aggref->aggfilter != NULL)
+       {
+           ListCell   *lc2;
+           bool        allow_presort = true;
+
+           /*
+            * When the Aggref has a FILTER clause, it's possible that the
+            * filter removes rows that cannot be sorted because the
+            * expression to sort by results in an error during its
+            * evaluation.  This is a problem for presorting as that happens
+            * before the FILTER, whereas without presorting, the Aggregate
+            * node will apply the FILTER *before* sorting.  So that we never
+            * try to sort anything that might error, here we aim to skip over
+            * any Aggrefs with arguments with expressions which, when
+            * evaluated, could cause an ERROR.  Vars and Consts are ok. There
+            * may be more cases that should be allowed, but more thought
+            * needs to be given.  Err on the side of caution.
+            */
+           foreach(lc2, aggref->args)
+           {
+               TargetEntry *tle = (TargetEntry *) lfirst(lc2);
+               Expr       *expr = tle->expr;
+
+               while (IsA(expr, RelabelType))
+                   expr = (Expr *) (castNode(RelabelType, expr))->arg;
+
+               /* Common case, Vars and Consts are ok */
+               if (IsA(expr, Var) || IsA(expr, Const))
+                   continue;
+
+               /* Unsupported.  Don't try to presort for this Aggref */
+               allow_presort = false;
+               break;
+           }
+
+           /* Skip unsupported Aggrefs */
+           if (!allow_presort)
+               continue;
+       }
+
+       unprocessed_aggs = bms_add_member(unprocessed_aggs,
+                                         foreach_current_index(lc));
    }
 
    /*
index 5aedd6a33109a47955a22e596e03d9e513f480c2..12ad55f9592ff31b6bce1c96ceaf4177eeb45128 100644 (file)
@@ -1586,6 +1586,43 @@ select sum(two order by two) from tenk1;
 (2 rows)
 
 reset enable_presorted_aggregate;
+--
+-- Test cases with FILTER clause
+--
+-- Ensure we presort when the aggregate contains plain Vars
+explain (costs off)
+select sum(two order by two) filter (where two > 1) from tenk1;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: two
+         ->  Seq Scan on tenk1
+(4 rows)
+
+-- Ensure we presort for RelabelType'd Vars
+explain (costs off)
+select string_agg(distinct f1, ',') filter (where length(f1) > 1)
+from varchar_tbl;
+             QUERY PLAN              
+-------------------------------------
+ Aggregate
+   ->  Sort
+         Sort Key: f1
+         ->  Seq Scan on varchar_tbl
+(4 rows)
+
+-- Ensure we don't presort when the aggregate's argument contains an
+-- explicit cast.
+explain (costs off)
+select string_agg(distinct f1::varchar(2), ',') filter (where length(f1) > 1)
+from varchar_tbl;
+          QUERY PLAN           
+-------------------------------
+ Aggregate
+   ->  Seq Scan on varchar_tbl
+(2 rows)
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --
index 8bffd007bd65d25892ef2d31467ef89ee2ad5278..7a5a6325229607063688aa2eae8469ae0cd355f5 100644 (file)
@@ -533,22 +533,22 @@ SELECT
 FROM
    (VALUES (NULL), (3), (1), (NULL), (NULL), (5), (2), (4), (NULL)) foo(bar);
 -[ RECORD 1 ]--------------------+-------------------------------------------------------------------------------------------------------------------------
-no_options                       | [1, 2, 3, 4, 5]
-returning_jsonb                  | [1, 2, 3, 4, 5]
-absent_on_null                   | [1, 2, 3, 4, 5]
-absentonnull_returning_jsonb     | [1, 2, 3, 4, 5]
-null_on_null                     | [1, 2, 3, 4, 5, null, null, null, null]
-nullonnull_returning_jsonb       | [1, 2, 3, 4, 5, null, null, null, null]
-row_no_options                   | [{"bar":1},                                                                                                             +
-                                 |  {"bar":2},                                                                                                             +
+no_options                       | [3, 1, 5, 2, 4]
+returning_jsonb                  | [3, 1, 5, 2, 4]
+absent_on_null                   | [3, 1, 5, 2, 4]
+absentonnull_returning_jsonb     | [3, 1, 5, 2, 4]
+null_on_null                     | [null, 3, 1, null, null, 5, 2, 4, null]
+nullonnull_returning_jsonb       | [null, 3, 1, null, null, 5, 2, 4, null]
+row_no_options                   | [{"bar":null},                                                                                                          +
                                  |  {"bar":3},                                                                                                             +
-                                 |  {"bar":4},                                                                                                             +
-                                 |  {"bar":5},                                                                                                             +
-                                 |  {"bar":null},                                                                                                          +
+                                 |  {"bar":1},                                                                                                             +
                                  |  {"bar":null},                                                                                                          +
                                  |  {"bar":null},                                                                                                          +
+                                 |  {"bar":5},                                                                                                             +
+                                 |  {"bar":2},                                                                                                             +
+                                 |  {"bar":4},                                                                                                             +
                                  |  {"bar":null}]
-row_returning_jsonb              | [{"bar": 1}, {"bar": 2}, {"bar": 3}, {"bar": 4}, {"bar": 5}, {"bar": null}, {"bar": null}, {"bar": null}, {"bar": null}]
+row_returning_jsonb              | [{"bar": null}, {"bar": 3}, {"bar": 1}, {"bar": null}, {"bar": null}, {"bar": 5}, {"bar": 2}, {"bar": 4}, {"bar": null}]
 row_filtered_agg                 | [{"bar":3},                                                                                                             +
                                  |  {"bar":4},                                                                                                             +
                                  |  {"bar":5}]
index 4bb3cc6b5b56fced3e4e1f50fa8bc41e02d8e0a6..af6b9b315a4139c1c052b1811f9df22943f0f40e 100644 (file)
@@ -595,6 +595,25 @@ explain (costs off)
 select sum(two order by two) from tenk1;
 reset enable_presorted_aggregate;
 
+--
+-- Test cases with FILTER clause
+--
+
+-- Ensure we presort when the aggregate contains plain Vars
+explain (costs off)
+select sum(two order by two) filter (where two > 1) from tenk1;
+
+-- Ensure we presort for RelabelType'd Vars
+explain (costs off)
+select string_agg(distinct f1, ',') filter (where length(f1) > 1)
+from varchar_tbl;
+
+-- Ensure we don't presort when the aggregate's argument contains an
+-- explicit cast.
+explain (costs off)
+select string_agg(distinct f1::varchar(2), ',') filter (where length(f1) > 1)
+from varchar_tbl;
+
 --
 -- Test combinations of DISTINCT and/or ORDER BY
 --