Show Parallel Bitmap Heap Scan worker stats in EXPLAIN ANALYZE
authorDavid Rowley <[email protected]>
Tue, 9 Jul 2024 00:15:47 +0000 (12:15 +1200)
committerDavid Rowley <[email protected]>
Tue, 9 Jul 2024 00:15:47 +0000 (12:15 +1200)
Nodes like Memoize report the cache stats for each parallel worker, so it
makes sense to show the exact and lossy pages in Parallel Bitmap Heap Scan
in a similar way.  Likewise, Sort shows the method and memory used for
each worker.

There was some discussion on whether the leader stats should include the
totals for each parallel worker or not.  I did some analysis on this to
see what other parallel node types do and it seems only Parallel Hash does
anything like this.  All the rest, per what's supported by
ExecParallelRetrieveInstrumentation() are consistent with each other.

Author: David Geier <[email protected]>
Author: Heikki Linnakangas <[email protected]>
Author: Donghang Lin <[email protected]>
Author: Alena Rybakina <[email protected]>
Author: David Rowley <[email protected]>
Reviewed-by: Dmitry Dolgov <[email protected]>
Reviewed-by: Michael Christofides <[email protected]>
Reviewed-by: Robert Haas <[email protected]>
Reviewed-by: Dilip Kumar <[email protected]>
Reviewed-by: Tomas Vondra <[email protected]>
Reviewed-by: Melanie Plageman <[email protected]>
Reviewed-by: Donghang Lin <[email protected]>
Reviewed-by: Masahiro Ikeda <[email protected]>
Discussion: https://postgr.es/m/b3d80961-c2e5-38cc-6a32-61886cdf766d%40gmail.com

src/backend/commands/explain.c
src/backend/executor/execParallel.c
src/backend/executor/nodeBitmapHeapscan.c
src/include/executor/nodeBitmapHeapscan.h
src/include/nodes/execnodes.h
src/tools/pgindent/typedefs.list

index 6defd26df50b1dbf2c546e1ea8ba40fa207d746a..118db12903c5bd2579454ef37b12dd64b4427271 100644 (file)
@@ -2010,8 +2010,7 @@ ExplainNode(PlanState *planstate, List *ancestors,
            if (plan->qual)
                show_instrumentation_count("Rows Removed by Filter", 1,
                                           planstate, es);
-           if (es->analyze)
-               show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
+           show_tidbitmap_info((BitmapHeapScanState *) planstate, es);
            break;
        case T_SampleScan:
            show_tablesample(((SampleScan *) plan)->tablesample,
@@ -3628,31 +3627,70 @@ show_hashagg_info(AggState *aggstate, ExplainState *es)
 }
 
 /*
- * If it's EXPLAIN ANALYZE, show exact/lossy pages for a BitmapHeapScan node
+ * Show exact/lossy pages for a BitmapHeapScan node
  */
 static void
 show_tidbitmap_info(BitmapHeapScanState *planstate, ExplainState *es)
 {
+   if (!es->analyze)
+       return;
+
    if (es->format != EXPLAIN_FORMAT_TEXT)
    {
        ExplainPropertyUInteger("Exact Heap Blocks", NULL,
-                               planstate->exact_pages, es);
+                               planstate->stats.exact_pages, es);
        ExplainPropertyUInteger("Lossy Heap Blocks", NULL,
-                               planstate->lossy_pages, es);
+                               planstate->stats.lossy_pages, es);
    }
    else
    {
-       if (planstate->exact_pages > 0 || planstate->lossy_pages > 0)
+       if (planstate->stats.exact_pages > 0 || planstate->stats.lossy_pages > 0)
        {
            ExplainIndentText(es);
            appendStringInfoString(es->str, "Heap Blocks:");
-           if (planstate->exact_pages > 0)
-               appendStringInfo(es->str, " exact=" UINT64_FORMAT, planstate->exact_pages);
-           if (planstate->lossy_pages > 0)
-               appendStringInfo(es->str, " lossy=" UINT64_FORMAT, planstate->lossy_pages);
+           if (planstate->stats.exact_pages > 0)
+               appendStringInfo(es->str, " exact=" UINT64_FORMAT, planstate->stats.exact_pages);
+           if (planstate->stats.lossy_pages > 0)
+               appendStringInfo(es->str, " lossy=" UINT64_FORMAT, planstate->stats.lossy_pages);
            appendStringInfoChar(es->str, '\n');
        }
    }
+
+   /* Display stats for each parallel worker */
+   if (planstate->pstate != NULL)
+   {
+       for (int n = 0; n < planstate->sinstrument->num_workers; n++)
+       {
+           BitmapHeapScanInstrumentation *si = &planstate->sinstrument->sinstrument[n];
+
+           if (si->exact_pages == 0 && si->lossy_pages == 0)
+               continue;
+
+           if (es->workers_state)
+               ExplainOpenWorker(n, es);
+
+           if (es->format == EXPLAIN_FORMAT_TEXT)
+           {
+               ExplainIndentText(es);
+               appendStringInfoString(es->str, "Heap Blocks:");
+               if (si->exact_pages > 0)
+                   appendStringInfo(es->str, " exact=" UINT64_FORMAT, si->exact_pages);
+               if (si->lossy_pages > 0)
+                   appendStringInfo(es->str, " lossy=" UINT64_FORMAT, si->lossy_pages);
+               appendStringInfoChar(es->str, '\n');
+           }
+           else
+           {
+               ExplainPropertyUInteger("Exact Heap Blocks", NULL,
+                                       si->exact_pages, es);
+               ExplainPropertyUInteger("Lossy Heap Blocks", NULL,
+                                       si->lossy_pages, es);
+           }
+
+           if (es->workers_state)
+               ExplainCloseWorker(n, es);
+       }
+   }
 }
 
 /*
index 8c53d1834e9f89f975cb6f1fe3c40fdafbc9e658..bfb3419efb7b1280e855931a7e5228f3b3611d13 100644 (file)
@@ -1076,6 +1076,9 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
        case T_MemoizeState:
            ExecMemoizeRetrieveInstrumentation((MemoizeState *) planstate);
            break;
+       case T_BitmapHeapScanState:
+           ExecBitmapHeapRetrieveInstrumentation((BitmapHeapScanState *) planstate);
+           break;
        default:
            break;
    }
index 6b48a6d8350fb09e5ef70341ecbd6c8de9066e49..3c63bdd93dff394915abc97e581923c18896927e 100644 (file)
@@ -236,9 +236,9 @@ BitmapHeapNext(BitmapHeapScanState *node)
            valid_block = table_scan_bitmap_next_block(scan, tbmres);
 
            if (tbmres->ntuples >= 0)
-               node->exact_pages++;
+               node->stats.exact_pages++;
            else
-               node->lossy_pages++;
+               node->stats.lossy_pages++;
 
            if (!valid_block)
            {
@@ -627,6 +627,29 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
 {
    TableScanDesc scanDesc;
 
+   /*
+    * When ending a parallel worker, copy the statistics gathered by the
+    * worker back into shared memory so that it can be picked up by the main
+    * process to report in EXPLAIN ANALYZE.
+    */
+   if (node->sinstrument != NULL && IsParallelWorker())
+   {
+       BitmapHeapScanInstrumentation *si;
+
+       Assert(ParallelWorkerNumber <= node->sinstrument->num_workers);
+       si = &node->sinstrument->sinstrument[ParallelWorkerNumber];
+
+       /*
+        * Here we accumulate the stats rather than performing memcpy on
+        * node->stats into si.  When a Gather/GatherMerge node finishes it
+        * will perform planner shutdown on the workers.  On rescan it will
+        * spin up new workers which will have a new BitmapHeapScanState and
+        * zeroed stats.
+        */
+       si->exact_pages += node->stats.exact_pages;
+       si->lossy_pages += node->stats.lossy_pages;
+   }
+
    /*
     * extract information from the node
     */
@@ -694,8 +717,10 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
    scanstate->tbmiterator = NULL;
    scanstate->tbmres = NULL;
    scanstate->pvmbuffer = InvalidBuffer;
-   scanstate->exact_pages = 0;
-   scanstate->lossy_pages = 0;
+
+   /* Zero the statistics counters */
+   memset(&scanstate->stats, 0, sizeof(BitmapHeapScanInstrumentation));
+
    scanstate->prefetch_iterator = NULL;
    scanstate->prefetch_pages = 0;
    scanstate->prefetch_target = 0;
@@ -803,7 +828,18 @@ void
 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
                       ParallelContext *pcxt)
 {
-   shm_toc_estimate_chunk(&pcxt->estimator, sizeof(ParallelBitmapHeapState));
+   Size        size;
+
+   size = MAXALIGN(sizeof(ParallelBitmapHeapState));
+
+   /* account for instrumentation, if required */
+   if (node->ss.ps.instrument && pcxt->nworkers > 0)
+   {
+       size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
+       size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
+   }
+
+   shm_toc_estimate_chunk(&pcxt->estimator, size);
    shm_toc_estimate_keys(&pcxt->estimator, 1);
 }
 
@@ -818,13 +854,27 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
                            ParallelContext *pcxt)
 {
    ParallelBitmapHeapState *pstate;
+   SharedBitmapHeapInstrumentation *sinstrument = NULL;
    dsa_area   *dsa = node->ss.ps.state->es_query_dsa;
+   char       *ptr;
+   Size        size;
 
    /* If there's no DSA, there are no workers; initialize nothing. */
    if (dsa == NULL)
        return;
 
-   pstate = shm_toc_allocate(pcxt->toc, sizeof(ParallelBitmapHeapState));
+   size = MAXALIGN(sizeof(ParallelBitmapHeapState));
+   if (node->ss.ps.instrument && pcxt->nworkers > 0)
+   {
+       size = add_size(size, offsetof(SharedBitmapHeapInstrumentation, sinstrument));
+       size = add_size(size, mul_size(pcxt->nworkers, sizeof(BitmapHeapScanInstrumentation)));
+   }
+
+   ptr = shm_toc_allocate(pcxt->toc, size);
+   pstate = (ParallelBitmapHeapState *) ptr;
+   ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+   if (node->ss.ps.instrument && pcxt->nworkers > 0)
+       sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
 
    pstate->tbmiterator = 0;
    pstate->prefetch_iterator = 0;
@@ -837,8 +887,18 @@ ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
 
    ConditionVariableInit(&pstate->cv);
 
+   if (sinstrument)
+   {
+       sinstrument->num_workers = pcxt->nworkers;
+
+       /* ensure any unfilled slots will contain zeroes */
+       memset(sinstrument->sinstrument, 0,
+              pcxt->nworkers * sizeof(BitmapHeapScanInstrumentation));
+   }
+
    shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
    node->pstate = pstate;
+   node->sinstrument = sinstrument;
 }
 
 /* ----------------------------------------------------------------
@@ -880,10 +940,37 @@ void
 ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
                               ParallelWorkerContext *pwcxt)
 {
-   ParallelBitmapHeapState *pstate;
+   char       *ptr;
 
    Assert(node->ss.ps.state->es_query_dsa != NULL);
 
-   pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
-   node->pstate = pstate;
+   ptr = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
+
+   node->pstate = (ParallelBitmapHeapState *) ptr;
+   ptr += MAXALIGN(sizeof(ParallelBitmapHeapState));
+
+   if (node->ss.ps.instrument)
+       node->sinstrument = (SharedBitmapHeapInstrumentation *) ptr;
+}
+
+/* ----------------------------------------------------------------
+ *     ExecBitmapHeapRetrieveInstrumentation
+ *
+ *     Transfer bitmap heap scan statistics from DSM to private memory.
+ * ----------------------------------------------------------------
+ */
+void
+ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node)
+{
+   SharedBitmapHeapInstrumentation *sinstrument = node->sinstrument;
+   Size        size;
+
+   if (sinstrument == NULL)
+       return;
+
+   size = offsetof(SharedBitmapHeapInstrumentation, sinstrument)
+       + sinstrument->num_workers * sizeof(BitmapHeapScanInstrumentation);
+
+   node->sinstrument = palloc(size);
+   memcpy(node->sinstrument, sinstrument, size);
 }
index ea003a9caaef52277b93c2d0a835a873510faae6..446a664590a9756858e5d36a6dc0359cfaebf322 100644 (file)
@@ -28,5 +28,6 @@ extern void ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
                                          ParallelContext *pcxt);
 extern void ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
                                           ParallelWorkerContext *pwcxt);
+extern void ExecBitmapHeapRetrieveInstrumentation(BitmapHeapScanState *node);
 
 #endif                         /* NODEBITMAPHEAPSCAN_H */
index abfcd5f590586ebaa225d6d07b56f91716f5104f..cac684d9b3af778474e1fec6e23ffa8354fe13c1 100644 (file)
@@ -1746,6 +1746,19 @@ typedef struct BitmapIndexScanState
    struct IndexScanDescData *biss_ScanDesc;
 } BitmapIndexScanState;
 
+/* ----------------
+ *  BitmapHeapScanInstrumentation information
+ *
+ *     exact_pages        total number of exact pages retrieved
+ *     lossy_pages        total number of lossy pages retrieved
+ * ----------------
+ */
+typedef struct BitmapHeapScanInstrumentation
+{
+   uint64      exact_pages;
+   uint64      lossy_pages;
+} BitmapHeapScanInstrumentation;
+
 /* ----------------
  *  SharedBitmapState information
  *
@@ -1789,6 +1802,20 @@ typedef struct ParallelBitmapHeapState
    ConditionVariable cv;
 } ParallelBitmapHeapState;
 
+/* ----------------
+ *  Instrumentation data for a parallel bitmap heap scan.
+ *
+ * A shared memory struct that each parallel worker copies its
+ * BitmapHeapScanInstrumentation information into at executor shutdown to
+ * allow the leader to display the information in EXPLAIN ANALYZE.
+ * ----------------
+ */
+typedef struct SharedBitmapHeapInstrumentation
+{
+   int         num_workers;
+   BitmapHeapScanInstrumentation sinstrument[FLEXIBLE_ARRAY_MEMBER];
+} SharedBitmapHeapInstrumentation;
+
 /* ----------------
  *  BitmapHeapScanState information
  *
@@ -1797,8 +1824,7 @@ typedef struct ParallelBitmapHeapState
  *     tbmiterator        iterator for scanning current pages
  *     tbmres             current-page data
  *     pvmbuffer          buffer for visibility-map lookups of prefetched pages
- *     exact_pages        total number of exact pages retrieved
- *     lossy_pages        total number of lossy pages retrieved
+ *     stats              execution statistics
  *     prefetch_iterator  iterator for prefetching ahead of current page
  *     prefetch_pages     # pages prefetch iterator is ahead of current
  *     prefetch_target    current target prefetch distance
@@ -1807,6 +1833,7 @@ typedef struct ParallelBitmapHeapState
  *     shared_tbmiterator     shared iterator
  *     shared_prefetch_iterator shared iterator for prefetching
  *     pstate             shared state for parallel bitmap scan
+ *     sinstrument        statistics for parallel workers
  * ----------------
  */
 typedef struct BitmapHeapScanState
@@ -1817,8 +1844,7 @@ typedef struct BitmapHeapScanState
    TBMIterator *tbmiterator;
    TBMIterateResult *tbmres;
    Buffer      pvmbuffer;
-   uint64      exact_pages;
-   uint64      lossy_pages;
+   BitmapHeapScanInstrumentation stats;
    TBMIterator *prefetch_iterator;
    int         prefetch_pages;
    int         prefetch_target;
@@ -1827,6 +1853,7 @@ typedef struct BitmapHeapScanState
    TBMSharedIterator *shared_tbmiterator;
    TBMSharedIterator *shared_prefetch_iterator;
    ParallelBitmapHeapState *pstate;
+   SharedBitmapHeapInstrumentation *sinstrument;
 } BitmapHeapScanState;
 
 /* ----------------
index 9320e4d808009d39064be653c15bdbbfc4a409ef..635e6d6e21545954a9332fa55af26b0b0ab81e3d 100644 (file)
@@ -262,6 +262,7 @@ BitmapAndPath
 BitmapAndState
 BitmapHeapPath
 BitmapHeapScan
+BitmapHeapScanInstrumentation
 BitmapHeapScanState
 BitmapIndexScan
 BitmapIndexScanState
@@ -2603,6 +2604,7 @@ SetToDefault
 SetupWorkerPtrType
 ShDependObjectInfo
 SharedAggInfo
+SharedBitmapHeapInstrumentation
 SharedBitmapState
 SharedDependencyObjectType
 SharedDependencyType