Skip to content

Commit 348744b

Browse files
committed
Introduce the Asymmetric Join (AJ) feature.
Asymmetric Join is an addition to the partitionwise join strategy: it allows the optimiser when it sees join of plain table (or jointree containing the only plain tables) with partitioned one to attempt joining the table with each of partitions separately. Here, as usual, we have some pro and cons: * Smaller JOIN can survive memory allocations or data skews * Different strategies for different pairs may be more effective. * It may allow to prune more partitions and with partition constrain select smaller volume of data. * But of course, here we have increasing complexity of planning. To implement this feature we add consider_asymmetric_join into the RelOptInfo structure. It must be true iff consider_partitionwise_join is false. Implementation follows the logic of partitionwise_join. We disallow any partitioned relations as an inner relation of AJ. The key point here is to resolve the problem with different ways of achieving RelOptInfo which can need different number of part_rels. New GUC enable_asymmetric_join allows to disable this feature, if necessary. A lot of regression tests were changed or newly created. TODO: Here we doesn't resolved Ashutosh Bapat's warning on scanning of one RangeEntryEntry in different parts of the plan. PostgreSQL used to implicitly assume that each leaf RangeTable (and RelOptInfo) correspond only one scan node. For example, such partition pruning code is designed with such assumption. An qual evaluation code also references index of scan node. After analysis I didn't found any problems which can be caused by this inconsistency. Unfortunately, it doesn't guarantee any issues in the future.
1 parent 4712177 commit 348744b

File tree

19 files changed

+2131
-239
lines changed

19 files changed

+2131
-239
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 155 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -10070,6 +10070,60 @@ SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a
1007010070
400 | 400
1007110071
(4 rows)
1007210072

10073+
-- Apply AJ to foreign tables
10074+
EXPLAIN (COSTS OFF)
10075+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2;
10076+
QUERY PLAN
10077+
---------------------------------------------------------------
10078+
Merge Append
10079+
Sort Key: t1.a
10080+
-> Foreign Scan
10081+
Relations: (ftprt1_p1 t1_1) INNER JOIN (ftprt2_p1 t2)
10082+
-> Foreign Scan
10083+
Relations: (ftprt1_p2 t1_2) INNER JOIN (ftprt2_p1 t2)
10084+
(6 rows)
10085+
10086+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2;
10087+
a | b
10088+
-----+-----
10089+
204 | 204
10090+
210 | 210
10091+
216 | 216
10092+
222 | 222
10093+
228 | 228
10094+
234 | 234
10095+
240 | 240
10096+
246 | 246
10097+
(8 rows)
10098+
10099+
-- FOR UPDATE requires whole-row reference, and so asymmetric join doesn't apply
10100+
EXPLAIN (COSTS OFF)
10101+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2;
10102+
QUERY PLAN
10103+
--------------------------------------------------
10104+
LockRows
10105+
-> Merge Join
10106+
Merge Cond: (t1.a = t2.b)
10107+
-> Append
10108+
-> Foreign Scan on ftprt1_p1 t1_1
10109+
-> Foreign Scan on ftprt1_p2 t1_2
10110+
-> Materialize
10111+
-> Foreign Scan on ftprt2_p1 t2
10112+
(8 rows)
10113+
10114+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2;
10115+
a | b
10116+
-----+-----
10117+
204 | 204
10118+
210 | 210
10119+
216 | 216
10120+
222 | 222
10121+
228 | 228
10122+
234 | 234
10123+
240 | 240
10124+
246 | 246
10125+
(8 rows)
10126+
1007310127
RESET enable_partitionwise_join;
1007410128
-- ===================================================================
1007510129
-- test partitionwise aggregates
@@ -11338,25 +11392,32 @@ RESET enable_partitionwise_join;
1133811392
SET enable_hashjoin TO false;
1133911393
EXPLAIN (VERBOSE, COSTS OFF)
1134011394
INSERT INTO join_tbl SELECT * FROM async_p1 t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
11341-
QUERY PLAN
11342-
----------------------------------------------------------------------------------------
11395+
QUERY PLAN
11396+
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1134311397
Insert on public.join_tbl
11344-
-> Nested Loop
11345-
Output: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
11346-
Join Filter: ((t1.a = t2.a) AND (t1.b = t2.b))
11347-
-> Foreign Scan on public.async_p1 t1
11348-
Output: t1.a, t1.b, t1.c
11349-
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0))
11350-
-> Append
11351-
-> Async Foreign Scan on public.async_p1 t2_1
11352-
Output: t2_1.a, t2_1.b, t2_1.c
11353-
Remote SQL: SELECT a, b, c FROM public.base_tbl1
11354-
-> Async Foreign Scan on public.async_p2 t2_2
11398+
-> Append
11399+
-> Async Foreign Scan
11400+
Output: t1.a, t1.b, t1.c, t2_1.a, t2_1.b, t2_1.c
11401+
Relations: (public.async_p1 t2_1) INNER JOIN (public.async_p1 t1)
11402+
Remote SQL: SELECT r3.a, r3.b, r3.c, r5.a, r5.b, r5.c FROM (public.base_tbl1 r5 INNER JOIN public.base_tbl1 r3 ON (((r3.a = r5.a)) AND ((r3.b = r5.b)) AND (((r3.b % 100) = 0))))
11403+
-> Nested Loop
11404+
Output: t1.a, t1.b, t1.c, t2_2.a, t2_2.b, t2_2.c
11405+
Join Filter: ((t1.a = t2_2.a) AND (t1.b = t2_2.b))
11406+
-> Foreign Scan on public.async_p1 t1
11407+
Output: t1.a, t1.b, t1.c
11408+
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0))
11409+
-> Foreign Scan on public.async_p2 t2_2
1135511410
Output: t2_2.a, t2_2.b, t2_2.c
1135611411
Remote SQL: SELECT a, b, c FROM public.base_tbl2
11412+
-> Nested Loop
11413+
Output: t1.a, t1.b, t1.c, t2_3.a, t2_3.b, t2_3.c
11414+
Join Filter: ((t1.a = t2_3.a) AND (t1.b = t2_3.b))
11415+
-> Foreign Scan on public.async_p1 t1
11416+
Output: t1.a, t1.b, t1.c
11417+
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE (((b % 100) = 0))
1135711418
-> Seq Scan on public.async_p3 t2_3
1135811419
Output: t2_3.a, t2_3.b, t2_3.c
11359-
(16 rows)
11420+
(23 rows)
1136011421

1136111422
INSERT INTO join_tbl SELECT * FROM async_p1 t1, async_pt t2 WHERE t1.a = t2.a AND t1.b = t2.b AND t1.b % 100 = 0;
1136211423
SELECT * FROM join_tbl ORDER BY a1;
@@ -11464,39 +11525,61 @@ ALTER FOREIGN TABLE async_p1 OPTIONS (use_remote_estimate 'true');
1146411525
ALTER FOREIGN TABLE async_p2 OPTIONS (use_remote_estimate 'true');
1146511526
EXPLAIN (VERBOSE, COSTS OFF)
1146611527
SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar';
11467-
QUERY PLAN
11468-
------------------------------------------------------------------------------------------
11469-
Nested Loop
11470-
Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt.a, async_pt.b, async_pt.c
11471-
-> Seq Scan on public.local_tbl
11472-
Output: local_tbl.a, local_tbl.b, local_tbl.c
11473-
Filter: (local_tbl.c = 'bar'::text)
11474-
-> Append
11475-
-> Async Foreign Scan on public.async_p1 async_pt_1
11528+
QUERY PLAN
11529+
-------------------------------------------------------------------------------------------------
11530+
Append
11531+
-> Nested Loop
11532+
Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt_1.a, async_pt_1.b, async_pt_1.c
11533+
-> Seq Scan on public.local_tbl
11534+
Output: local_tbl.a, local_tbl.b, local_tbl.c
11535+
Filter: (local_tbl.c = 'bar'::text)
11536+
-> Foreign Scan on public.async_p1 async_pt_1
1147611537
Output: async_pt_1.a, async_pt_1.b, async_pt_1.c
1147711538
Remote SQL: SELECT a, b, c FROM public.base_tbl1 WHERE ((a = $1::integer))
11478-
-> Async Foreign Scan on public.async_p2 async_pt_2
11539+
-> Nested Loop
11540+
Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt_2.a, async_pt_2.b, async_pt_2.c
11541+
-> Seq Scan on public.local_tbl
11542+
Output: local_tbl.a, local_tbl.b, local_tbl.c
11543+
Filter: (local_tbl.c = 'bar'::text)
11544+
-> Foreign Scan on public.async_p2 async_pt_2
1147911545
Output: async_pt_2.a, async_pt_2.b, async_pt_2.c
1148011546
Remote SQL: SELECT a, b, c FROM public.base_tbl2 WHERE ((a = $1::integer))
11547+
-> Hash Join
11548+
Output: local_tbl.a, local_tbl.b, local_tbl.c, async_pt_3.a, async_pt_3.b, async_pt_3.c
11549+
Hash Cond: (async_pt_3.a = local_tbl.a)
1148111550
-> Seq Scan on public.async_p3 async_pt_3
1148211551
Output: async_pt_3.a, async_pt_3.b, async_pt_3.c
11483-
Filter: (async_pt_3.a = local_tbl.a)
11484-
(15 rows)
11552+
-> Hash
11553+
Output: local_tbl.a, local_tbl.b, local_tbl.c
11554+
-> Seq Scan on public.local_tbl
11555+
Output: local_tbl.a, local_tbl.b, local_tbl.c
11556+
Filter: (local_tbl.c = 'bar'::text)
11557+
(27 rows)
1148511558

1148611559
EXPLAIN (ANALYZE, COSTS OFF, SUMMARY OFF, TIMING OFF)
1148711560
SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar';
11488-
QUERY PLAN
11489-
-------------------------------------------------------------------------------
11490-
Nested Loop (actual rows=1 loops=1)
11491-
-> Seq Scan on local_tbl (actual rows=1 loops=1)
11492-
Filter: (c = 'bar'::text)
11493-
Rows Removed by Filter: 1
11494-
-> Append (actual rows=1 loops=1)
11495-
-> Async Foreign Scan on async_p1 async_pt_1 (never executed)
11496-
-> Async Foreign Scan on async_p2 async_pt_2 (actual rows=1 loops=1)
11497-
-> Seq Scan on async_p3 async_pt_3 (never executed)
11498-
Filter: (a = local_tbl.a)
11499-
(9 rows)
11561+
QUERY PLAN
11562+
-------------------------------------------------------------------------
11563+
Append (actual rows=1 loops=1)
11564+
-> Nested Loop (actual rows=0 loops=1)
11565+
-> Seq Scan on local_tbl (actual rows=1 loops=1)
11566+
Filter: (c = 'bar'::text)
11567+
Rows Removed by Filter: 1
11568+
-> Foreign Scan on async_p1 async_pt_1 (actual rows=0 loops=1)
11569+
-> Nested Loop (actual rows=1 loops=1)
11570+
-> Seq Scan on local_tbl (actual rows=1 loops=1)
11571+
Filter: (c = 'bar'::text)
11572+
Rows Removed by Filter: 1
11573+
-> Foreign Scan on async_p2 async_pt_2 (actual rows=1 loops=1)
11574+
-> Hash Join (actual rows=0 loops=1)
11575+
Hash Cond: (async_pt_3.a = local_tbl.a)
11576+
-> Seq Scan on async_p3 async_pt_3 (actual rows=200 loops=1)
11577+
-> Hash (actual rows=1 loops=1)
11578+
Buckets: 1024 Batches: 1 Memory Usage: 9kB
11579+
-> Seq Scan on local_tbl (actual rows=1 loops=1)
11580+
Filter: (c = 'bar'::text)
11581+
Rows Removed by Filter: 1
11582+
(19 rows)
1150011583

1150111584
SELECT * FROM local_tbl, async_pt WHERE local_tbl.a = async_pt.a AND local_tbl.c = 'bar';
1150211585
a | b | c | a | b | c
@@ -11670,29 +11753,45 @@ SET enable_mergejoin TO false;
1167011753
SET enable_hashjoin TO false;
1167111754
EXPLAIN (VERBOSE, COSTS OFF)
1167211755
SELECT * FROM async_pt t1, async_p2 t2 WHERE t1.a = t2.a AND t1.b === 505;
11673-
QUERY PLAN
11674-
----------------------------------------------------------------
11675-
Nested Loop
11676-
Output: t1.a, t1.b, t1.c, t2.a, t2.b, t2.c
11677-
Join Filter: (t1.a = t2.a)
11678-
-> Append
11679-
-> Async Foreign Scan on public.async_p1 t1_1
11756+
QUERY PLAN
11757+
----------------------------------------------------------------------
11758+
Append
11759+
-> Nested Loop
11760+
Output: t1_1.a, t1_1.b, t1_1.c, t2.a, t2.b, t2.c
11761+
Join Filter: (t1_1.a = t2.a)
11762+
-> Foreign Scan on public.async_p2 t2
11763+
Output: t2.a, t2.b, t2.c
11764+
Remote SQL: SELECT a, b, c FROM public.base_tbl2
11765+
-> Materialize
1168011766
Output: t1_1.a, t1_1.b, t1_1.c
11681-
Filter: (t1_1.b === 505)
11682-
Remote SQL: SELECT a, b, c FROM public.base_tbl1
11683-
-> Async Foreign Scan on public.async_p2 t1_2
11684-
Output: t1_2.a, t1_2.b, t1_2.c
11685-
Filter: (t1_2.b === 505)
11767+
-> Foreign Scan on public.async_p1 t1_1
11768+
Output: t1_1.a, t1_1.b, t1_1.c
11769+
Filter: (t1_1.b === 505)
11770+
Remote SQL: SELECT a, b, c FROM public.base_tbl1
11771+
-> Nested Loop
11772+
Output: t1_2.a, t1_2.b, t1_2.c, t2.a, t2.b, t2.c
11773+
Join Filter: (t1_2.a = t2.a)
11774+
-> Foreign Scan on public.async_p2 t2
11775+
Output: t2.a, t2.b, t2.c
1168611776
Remote SQL: SELECT a, b, c FROM public.base_tbl2
11687-
-> Seq Scan on public.async_p3 t1_3
11688-
Output: t1_3.a, t1_3.b, t1_3.c
11689-
Filter: (t1_3.b === 505)
11690-
-> Materialize
11691-
Output: t2.a, t2.b, t2.c
11777+
-> Materialize
11778+
Output: t1_2.a, t1_2.b, t1_2.c
11779+
-> Foreign Scan on public.async_p2 t1_2
11780+
Output: t1_2.a, t1_2.b, t1_2.c
11781+
Filter: (t1_2.b === 505)
11782+
Remote SQL: SELECT a, b, c FROM public.base_tbl2
11783+
-> Nested Loop
11784+
Output: t1_3.a, t1_3.b, t1_3.c, t2.a, t2.b, t2.c
11785+
Join Filter: (t1_3.a = t2.a)
1169211786
-> Foreign Scan on public.async_p2 t2
1169311787
Output: t2.a, t2.b, t2.c
1169411788
Remote SQL: SELECT a, b, c FROM public.base_tbl2
11695-
(20 rows)
11789+
-> Materialize
11790+
Output: t1_3.a, t1_3.b, t1_3.c
11791+
-> Seq Scan on public.async_p3 t1_3
11792+
Output: t1_3.a, t1_3.b, t1_3.c
11793+
Filter: (t1_3.b === 505)
11794+
(36 rows)
1169611795

1169711796
SELECT * FROM async_pt t1, async_p2 t2 WHERE t1.a = t2.a AND t1.b === 505;
1169811797
a | b | c | a | b | c

contrib/postgres_fdw/sql/postgres_fdw.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3139,6 +3139,15 @@ EXPLAIN (COSTS OFF)
31393139
SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
31403140
SELECT t1.a, t2.b FROM fprt1 t1 INNER JOIN fprt2 t2 ON (t1.a = t2.b) WHERE t1.a % 25 = 0 ORDER BY 1,2 FOR UPDATE OF t1;
31413141

3142+
-- Apply AJ to foreign tables
3143+
EXPLAIN (COSTS OFF)
3144+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2;
3145+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2;
3146+
-- FOR UPDATE requires whole-row reference, and so asymmetric join doesn't apply
3147+
EXPLAIN (COSTS OFF)
3148+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2;
3149+
SELECT t1.a,t2.b FROM fprt1 t1 , ftprt2_p1 t2 WHERE (t1.a = t2.b) AND t2.c like '%0004' ORDER BY 1,2 FOR UPDATE OF t2;
3150+
31423151
RESET enable_partitionwise_join;
31433152

31443153

src/backend/optimizer/path/allpaths.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4303,8 +4303,11 @@ generate_partitionwise_join_paths(PlannerInfo *root, RelOptInfo *rel)
43034303
if (!IS_PARTITIONED_REL(rel))
43044304
return;
43054305

4306-
/* The relation should have consider_partitionwise_join set. */
4307-
Assert(rel->consider_partitionwise_join);
4306+
/*
4307+
* The relation should have consider_partitionwise_join or
4308+
* consider_asymmetric_join set.
4309+
*/
4310+
Assert(rel->consider_partitionwise_join ^ rel->consider_asymmetric_join);
43084311

43094312
/* Guard against stack overflow due to overly deep partition hierarchy. */
43104313
check_stack_depth();

src/backend/optimizer/path/costsize.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ bool enable_mergejoin = true;
146146
bool enable_hashjoin = true;
147147
bool enable_gathermerge = true;
148148
bool enable_partitionwise_join = false;
149+
bool enable_asymmetric_join = true;
149150
bool enable_partitionwise_aggregate = false;
150151
bool enable_parallel_append = true;
151152
bool enable_parallel_hash = true;

0 commit comments

Comments
 (0)