From: Richard Guo Date: Mon, 9 Dec 2024 11:38:22 +0000 (+0900) Subject: Avoid unnecessary wrapping for Vars and PHVs X-Git-Tag: REL_18_BETA1~1330 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=f64ec81a810ebd4649beb6c153844fa9ae1ecffe;p=postgresql.git Avoid unnecessary wrapping for Vars and PHVs When pulling up a lateral subquery that is under an outer join, the current code always wraps a Var or PHV in the subquery's targetlist into a new PlaceHolderVar if it is a lateral reference to something outside the subquery. This is necessary when the Var/PHV references the non-nullable side of the outer join from the nullable side: we need to ensure that it is evaluated at the right place and hence is forced to null when the outer join should do so. However, if the referenced rel is under the same lowest nulling outer join, we can actually omit the wrapping. That's safe because if the subquery variable is forced to NULL by the outer join, the lateral reference variable will come out as NULL too. It could be beneficial to get rid of such PHVs because they imply lateral dependencies, which force us to resort to nestloop joins. This patch leverages the newly introduced nullingrel_info to check if the nullingrels of the subquery RTE are a subset of those of the laterally referenced rel, in order to determine if the referenced rel is under the same lowest nulling outer join. No backpatch as this could result in plan changes. Author: Richard Guo Reviewed-by: James Coleman, Dmitry Dolgov, Andrei Lepikhov Discussion: https://postgr.es/m/CAMbWs48uk6C7Z9m_FNT8_21CMCk68hrgAsz=z6zpP1PNZMkeoQ@mail.gmail.com --- diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c index 2ebd938f6bd..3fa4d78c3e0 100644 --- a/src/backend/optimizer/prep/prepjointree.c +++ b/src/backend/optimizer/prep/prepjointree.c @@ -2598,26 +2598,47 @@ pullup_replace_vars_callback(Var *var, /* * Simple Vars always escape being wrapped, unless they are * lateral references to something outside the subquery being - * pulled up. (Even then, we could omit the PlaceHolderVar if - * the referenced rel is under the same lowest outer join, but - * it doesn't seem worth the trouble to check that.) + * pulled up and the referenced rel is not under the same + * lowest nulling outer join. */ + wrap = false; if (rcon->target_rte->lateral && !bms_is_member(((Var *) newnode)->varno, rcon->relids)) - wrap = true; - else - wrap = false; + { + nullingrel_info *nullinfo = rcon->nullinfo; + int lvarno = ((Var *) newnode)->varno; + + Assert(lvarno > 0 && lvarno <= nullinfo->rtlength); + if (!bms_is_subset(nullinfo->nullingrels[rcon->varno], + nullinfo->nullingrels[lvarno])) + wrap = true; + } } else if (newnode && IsA(newnode, PlaceHolderVar) && ((PlaceHolderVar *) newnode)->phlevelsup == 0) { /* The same rules apply for a PlaceHolderVar */ + wrap = false; if (rcon->target_rte->lateral && !bms_is_subset(((PlaceHolderVar *) newnode)->phrels, rcon->relids)) - wrap = true; - else - wrap = false; + { + nullingrel_info *nullinfo = rcon->nullinfo; + Relids lvarnos = ((PlaceHolderVar *) newnode)->phrels; + int lvarno; + + lvarno = -1; + while ((lvarno = bms_next_member(lvarnos, lvarno)) >= 0) + { + Assert(lvarno > 0 && lvarno <= nullinfo->rtlength); + if (!bms_is_subset(nullinfo->nullingrels[rcon->varno], + nullinfo->nullingrels[lvarno])) + { + wrap = true; + break; + } + } + } } else if (rcon->wrap_non_vars) { diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index 6c1fb2bfdbb..5c5a769b552 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -1848,6 +1848,281 @@ order by 1, 2; 4567890123456789 | 9135780246913578 (11 rows) +-- lateral references for simple Vars can escape being wrapped if the +-- referenced rel is under the same lowest nulling outer join +explain (verbose, costs off) +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 inner join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + QUERY PLAN +-------------------------------------------------------- + Sort + Output: t1.q1, t2.q2 + Sort Key: t1.q1, t2.q2 + -> Hash Right Join + Output: t1.q1, t2.q2 + Hash Cond: (t2.q1 = t1.q1) + -> Hash Join + Output: t2.q2, t2.q1 + Hash Cond: (t2.q2 = t3.q1) + -> Seq Scan on public.int8_tbl t2 + Output: t2.q1, t2.q2 + -> Hash + Output: t3.q1 + -> Seq Scan on public.int8_tbl t3 + Output: t3.q1 + -> Hash + Output: t1.q1 + -> Seq Scan on public.int8_tbl t1 + Output: t1.q1 +(19 rows) + +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 inner join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + q1 | x +------------------+------------------ + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 +(21 rows) + +-- otherwise we need to wrap the Vars +explain (verbose, costs off) +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 left join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + QUERY PLAN +-------------------------------------------------- + Sort + Output: t1.q1, (t2.q2) + Sort Key: t1.q1, (t2.q2) + -> Hash Right Join + Output: t1.q1, (t2.q2) + Hash Cond: (t2.q1 = t1.q1) + -> Nested Loop Left Join + Output: t2.q1, (t2.q2) + -> Seq Scan on public.int8_tbl t2 + Output: t2.q1, t2.q2 + -> Seq Scan on public.int8_tbl t3 + Output: t3.q1, t2.q2 + Filter: (t2.q2 = t3.q1) + -> Hash + Output: t1.q1 + -> Seq Scan on public.int8_tbl t1 + Output: t1.q1 +(17 rows) + +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 left join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + q1 | x +------------------+------------------ + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | 4567890123456789 + 123 | + 123 | + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 + 4567890123456789 | + 4567890123456789 | + 4567890123456789 | +(26 rows) + +-- lateral references for PHVs can also escape being wrapped if the +-- referenced rel is under the same lowest nulling outer join +explain (verbose, costs off) +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------- + Sort + Output: (COALESCE(t3.q1)), t4.q1, t4.q2 + Sort Key: (COALESCE(t3.q1)), t4.q1, t4.q2 + -> Hash Right Join + Output: (COALESCE(t3.q1)), t4.q1, t4.q2 + Hash Cond: (t4.q1 = t1.q2) + -> Hash Join + Output: (COALESCE(t3.q1)), t4.q1, t4.q2 + Hash Cond: (t2.q2 = t4.q1) + -> Hash Left Join + Output: t2.q2, (COALESCE(t3.q1)) + Hash Cond: (t2.q1 = t3.q2) + -> Seq Scan on public.int8_tbl t2 + Output: t2.q1, t2.q2 + -> Hash + Output: t3.q2, (COALESCE(t3.q1)) + -> Seq Scan on public.int8_tbl t3 + Output: t3.q2, COALESCE(t3.q1) + -> Hash + Output: t4.q1, t4.q2 + -> Seq Scan on public.int8_tbl t4 + Output: t4.q1, t4.q2 + -> Hash + Output: t1.q2 + -> Seq Scan on public.int8_tbl t1 + Output: t1.q2 +(26 rows) + +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + y | q1 | q2 +------------------+------------------+------------------- + 123 | 123 | 456 + 123 | 123 | 4567890123456789 + 123 | 4567890123456789 | -4567890123456789 + 123 | 4567890123456789 | -4567890123456789 + 123 | 4567890123456789 | 123 + 123 | 4567890123456789 | 123 + 123 | 4567890123456789 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 456 + 4567890123456789 | 123 | 4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + | | + | | +(24 rows) + +-- otherwise we need to wrap the PHVs +explain (verbose, costs off) +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + QUERY PLAN +---------------------------------------------------------------- + Sort + Output: ((COALESCE(t3.q1))), t4.q1, t4.q2 + Sort Key: ((COALESCE(t3.q1))), t4.q1, t4.q2 + -> Hash Right Join + Output: ((COALESCE(t3.q1))), t4.q1, t4.q2 + Hash Cond: (t4.q1 = t1.q2) + -> Nested Loop + Output: t4.q1, t4.q2, ((COALESCE(t3.q1))) + Join Filter: (t2.q2 = t4.q1) + -> Hash Left Join + Output: t2.q2, (COALESCE(t3.q1)) + Hash Cond: (t2.q1 = t3.q2) + -> Seq Scan on public.int8_tbl t2 + Output: t2.q1, t2.q2 + -> Hash + Output: t3.q2, (COALESCE(t3.q1)) + -> Seq Scan on public.int8_tbl t3 + Output: t3.q2, COALESCE(t3.q1) + -> Seq Scan on public.int8_tbl t4 + Output: t4.q1, t4.q2, (COALESCE(t3.q1)) + -> Hash + Output: t1.q2 + -> Seq Scan on public.int8_tbl t1 + Output: t1.q2 +(24 rows) + +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + y | q1 | q2 +------------------+------------------+------------------- + 123 | 123 | 456 + 123 | 123 | 4567890123456789 + 123 | 4567890123456789 | -4567890123456789 + 123 | 4567890123456789 | -4567890123456789 + 123 | 4567890123456789 | 123 + 123 | 4567890123456789 | 123 + 123 | 4567890123456789 | 4567890123456789 + 123 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 123 | 456 + 4567890123456789 | 123 | 4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | -4567890123456789 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 123 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + 4567890123456789 | 4567890123456789 | 4567890123456789 + | | + | | +(24 rows) + -- -- Tests for CTE inlining behavior -- diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql index e5a562c3f5d..db1969256fc 100644 --- a/src/test/regress/sql/subselect.sql +++ b/src/test/regress/sql/subselect.sql @@ -939,6 +939,76 @@ select t1.q1, x from on t1.q2 = t2.q2 order by 1, 2; +-- lateral references for simple Vars can escape being wrapped if the +-- referenced rel is under the same lowest nulling outer join +explain (verbose, costs off) +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 inner join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 inner join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + +-- otherwise we need to wrap the Vars +explain (verbose, costs off) +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 left join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + +select t1.q1, x from + int8_tbl t1 left join + (int8_tbl t2 left join + lateral (select t2.q2 as x, * from int8_tbl t3) ss on t2.q2 = ss.q1) + on t1.q1 = t2.q1 +order by 1, 2; + +-- lateral references for PHVs can also escape being wrapped if the +-- referenced rel is under the same lowest nulling outer join +explain (verbose, costs off) +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 inner join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + +-- otherwise we need to wrap the PHVs +explain (verbose, costs off) +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + +select ss2.* from + int8_tbl t1 left join + (int8_tbl t2 left join + (select coalesce(q1) as x, * from int8_tbl t3) ss1 on t2.q1 = ss1.q2 left join + lateral (select ss1.x as y, * from int8_tbl t4) ss2 on t2.q2 = ss2.q1) + on t1.q2 = ss2.q1 +order by 1, 2, 3; + -- -- Tests for CTE inlining behavior --