@@ -161,6 +161,7 @@ static Selectivity get_foreign_key_join_selectivity(PlannerInfo *root,
161
161
static void set_rel_width (PlannerInfo * root , RelOptInfo * rel );
162
162
static double relation_byte_size (double tuples , int width );
163
163
static double page_size (double tuples , int width );
164
+ static double get_parallel_divisor (Path * path );
164
165
165
166
166
167
/*
@@ -238,32 +239,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
238
239
/* Adjust costing for parallelism, if used. */
239
240
if (path -> parallel_workers > 0 )
240
241
{
241
- double parallel_divisor = path -> parallel_workers ;
242
- double leader_contribution ;
243
-
244
- /*
245
- * Early experience with parallel query suggests that when there is
246
- * only one worker, the leader often makes a very substantial
247
- * contribution to executing the parallel portion of the plan, but as
248
- * more workers are added, it does less and less, because it's busy
249
- * reading tuples from the workers and doing whatever non-parallel
250
- * post-processing is needed. By the time we reach 4 workers, the
251
- * leader no longer makes a meaningful contribution. Thus, for now,
252
- * estimate that the leader spends 30% of its time servicing each
253
- * worker, and the remainder executing the parallel plan.
254
- */
255
- leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
256
- if (leader_contribution > 0 )
257
- parallel_divisor += leader_contribution ;
258
-
259
- /*
260
- * In the case of a parallel plan, the row count needs to represent
261
- * the number of tuples processed per worker. Otherwise, higher-level
262
- * plan nodes that appear below the gather will be costed incorrectly,
263
- * because they'll anticipate receiving more rows than any given copy
264
- * will actually get.
265
- */
266
- path -> rows = clamp_row_est (path -> rows / parallel_divisor );
242
+ double parallel_divisor = get_parallel_divisor (path );
267
243
268
244
/* The CPU cost is divided among all the workers. */
269
245
cpu_run_cost /= parallel_divisor ;
@@ -274,6 +250,12 @@ cost_seqscan(Path *path, PlannerInfo *root,
274
250
* prefetching. For now, we assume that the disk run cost can't be
275
251
* amortized at all.
276
252
*/
253
+
254
+ /*
255
+ * In the case of a parallel plan, the row count needs to represent
256
+ * the number of tuples processed per worker.
257
+ */
258
+ path -> rows = clamp_row_est (path -> rows / parallel_divisor );
277
259
}
278
260
279
261
path -> startup_cost = startup_cost ;
@@ -2014,6 +1996,10 @@ final_cost_nestloop(PlannerInfo *root, NestPath *path,
2014
1996
else
2015
1997
path -> path .rows = path -> path .parent -> rows ;
2016
1998
1999
+ /* For partial paths, scale row estimate. */
2000
+ if (path -> path .parallel_workers > 0 )
2001
+ path -> path .rows /= get_parallel_divisor (& path -> path );
2002
+
2017
2003
/*
2018
2004
* We could include disable_cost in the preliminary estimate, but that
2019
2005
* would amount to optimizing for the case where the join method is
@@ -2432,6 +2418,10 @@ final_cost_mergejoin(PlannerInfo *root, MergePath *path,
2432
2418
else
2433
2419
path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
2434
2420
2421
+ /* For partial paths, scale row estimate. */
2422
+ if (path -> jpath .path .parallel_workers > 0 )
2423
+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2424
+
2435
2425
/*
2436
2426
* We could include disable_cost in the preliminary estimate, but that
2437
2427
* would amount to optimizing for the case where the join method is
@@ -2811,6 +2801,10 @@ final_cost_hashjoin(PlannerInfo *root, HashPath *path,
2811
2801
else
2812
2802
path -> jpath .path .rows = path -> jpath .path .parent -> rows ;
2813
2803
2804
+ /* For partial paths, scale row estimate. */
2805
+ if (path -> jpath .path .parallel_workers > 0 )
2806
+ path -> jpath .path .rows /= get_parallel_divisor (& path -> jpath .path );
2807
+
2814
2808
/*
2815
2809
* We could include disable_cost in the preliminary estimate, but that
2816
2810
* would amount to optimizing for the case where the join method is
@@ -4799,3 +4793,31 @@ page_size(double tuples, int width)
4799
4793
{
4800
4794
return ceil (relation_byte_size (tuples , width ) / BLCKSZ );
4801
4795
}
4796
+
4797
+ /*
4798
+ * Estimate the fraction of the work that each worker will do given the
4799
+ * number of workers budgeted for the path.
4800
+ */
4801
+ static double
4802
+ get_parallel_divisor (Path * path )
4803
+ {
4804
+ double parallel_divisor = path -> parallel_workers ;
4805
+ double leader_contribution ;
4806
+
4807
+ /*
4808
+ * Early experience with parallel query suggests that when there is only
4809
+ * one worker, the leader often makes a very substantial contribution to
4810
+ * executing the parallel portion of the plan, but as more workers are
4811
+ * added, it does less and less, because it's busy reading tuples from the
4812
+ * workers and doing whatever non-parallel post-processing is needed. By
4813
+ * the time we reach 4 workers, the leader no longer makes a meaningful
4814
+ * contribution. Thus, for now, estimate that the leader spends 30% of
4815
+ * its time servicing each worker, and the remainder executing the
4816
+ * parallel plan.
4817
+ */
4818
+ leader_contribution = 1.0 - (0.3 * path -> parallel_workers );
4819
+ if (leader_contribution > 0 )
4820
+ parallel_divisor += leader_contribution ;
4821
+
4822
+ return parallel_divisor ;
4823
+ }
0 commit comments