BitmapHeapScan: Push skip_fetch optimization into table AM
authorTomas Vondra <[email protected]>
Sat, 6 Apr 2024 22:24:12 +0000 (00:24 +0200)
committerTomas Vondra <[email protected]>
Sat, 6 Apr 2024 22:24:14 +0000 (00:24 +0200)
Commit 7c70996ebf0949b142 introduced an optimization to allow bitmap
scans to operate like index-only scans by not fetching a block from the
heap if none of the underlying data is needed and the block is marked
all visible in the visibility map.

With the introduction of table AMs, a FIXME was added to this code
indicating that the skip_fetch logic should be pushed into the table
AM-specific code, as not all table AMs may use a visibility map in the
same way.

This commit resolves this FIXME for the current block. The layering
violation is still present in BitmapHeapScans's prefetching code, which
uses the visibility map to decide whether or not to prefetch a block.
However, this can be addressed independently.

Author: Melanie Plageman
Reviewed-by: Andres Freund, Heikki Linnakangas, Tomas Vondra, Mark Dilger
Discussion: https://postgr.es/m/CAAKRu_ZwCwWFeL_H3ia26bP2e7HiKLWt0ZmGXPVwPO6uXq0vaA%40mail.gmail.com

src/backend/access/heap/heapam.c
src/backend/access/heap/heapam_handler.c
src/backend/executor/nodeBitmapHeapscan.c
src/include/access/heapam.h
src/include/access/tableam.h
src/include/nodes/execnodes.h

index dada2ecd1e32ab6a15a27b98ae70fb4788aa32aa..01bb2f4cc164efc1cb03d130b79ddd8486849df0 100644 (file)
@@ -967,6 +967,8 @@ heap_beginscan(Relation relation, Snapshot snapshot,
    scan->rs_base.rs_flags = flags;
    scan->rs_base.rs_parallel = parallel_scan;
    scan->rs_strategy = NULL;   /* set in initscan */
+   scan->rs_vmbuffer = InvalidBuffer;
+   scan->rs_empty_tuples_pending = 0;
 
    /*
     * Disable page-at-a-time mode if it's not a MVCC-safe snapshot.
@@ -1055,6 +1057,14 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
    if (BufferIsValid(scan->rs_cbuf))
        ReleaseBuffer(scan->rs_cbuf);
 
+   if (BufferIsValid(scan->rs_vmbuffer))
+   {
+       ReleaseBuffer(scan->rs_vmbuffer);
+       scan->rs_vmbuffer = InvalidBuffer;
+   }
+
+   Assert(scan->rs_empty_tuples_pending == 0);
+
    /*
     * reinitialize scan descriptor
     */
@@ -1074,6 +1084,11 @@ heap_endscan(TableScanDesc sscan)
    if (BufferIsValid(scan->rs_cbuf))
        ReleaseBuffer(scan->rs_cbuf);
 
+   if (BufferIsValid(scan->rs_vmbuffer))
+       ReleaseBuffer(scan->rs_vmbuffer);
+
+   Assert(scan->rs_empty_tuples_pending == 0);
+
    /*
     * decrement relation reference count and free scan descriptor storage
     */
index 3e7a6b5548b05882414192f4431fe30b3b267b0d..58de2c82a707ab99579fab844d89684d7c1e0b3c 100644 (file)
@@ -27,6 +27,7 @@
 #include "access/syncscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/index.h"
@@ -2198,6 +2199,24 @@ heapam_scan_bitmap_next_block(TableScanDesc scan,
    hscan->rs_cindex = 0;
    hscan->rs_ntuples = 0;
 
+   /*
+    * We can skip fetching the heap page if we don't need any fields from the
+    * heap, the bitmap entries don't need rechecking, and all tuples on the
+    * page are visible to our transaction.
+    */
+   if (!(scan->rs_flags & SO_NEED_TUPLES) &&
+       !tbmres->recheck &&
+       VM_ALL_VISIBLE(scan->rs_rd, tbmres->blockno, &hscan->rs_vmbuffer))
+   {
+       /* can't be lossy in the skip_fetch case */
+       Assert(tbmres->ntuples >= 0);
+       Assert(hscan->rs_empty_tuples_pending >= 0);
+
+       hscan->rs_empty_tuples_pending += tbmres->ntuples;
+
+       return true;
+   }
+
    /*
     * Ignore any claimed entries past what we think is the end of the
     * relation. It may have been extended after the start of our scan (we
@@ -2310,6 +2329,16 @@ heapam_scan_bitmap_next_tuple(TableScanDesc scan,
    Page        page;
    ItemId      lp;
 
+   if (hscan->rs_empty_tuples_pending > 0)
+   {
+       /*
+        * If we don't have to fetch the tuple, just return nulls.
+        */
+       ExecStoreAllNullTuple(slot);
+       hscan->rs_empty_tuples_pending--;
+       return true;
+   }
+
    /*
     * Out of range?  If so, nothing more to look at on this page
     */
index 2148a21531af4768e428b7f0be857ca465558106..5f768701a5ee981e52f3b9525b4ebe6e5b8ed8a7 100644 (file)
@@ -105,16 +105,6 @@ BitmapHeapNext(BitmapHeapScanState *node)
     */
    if (!node->initialized)
    {
-       /*
-        * We can potentially skip fetching heap pages if we do not need any
-        * columns of the table, either for checking non-indexable quals or
-        * for returning data.  This test is a bit simplistic, as it checks
-        * the stronger condition that there's no qual or return tlist at all.
-        * But in most cases it's probably not worth working harder than that.
-        */
-       node->can_skip_fetch = (node->ss.ps.plan->qual == NIL &&
-                               node->ss.ps.plan->targetlist == NIL);
-
        if (!pstate)
        {
            tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
@@ -195,10 +185,24 @@ BitmapHeapNext(BitmapHeapScanState *node)
         */
        if (!scan)
        {
+           bool        need_tuples = false;
+
+           /*
+            * We can potentially skip fetching heap pages if we do not need
+            * any columns of the table, either for checking non-indexable
+            * quals or for returning data.  This test is a bit simplistic, as
+            * it checks the stronger condition that there's no qual or return
+            * tlist at all. But in most cases it's probably not worth working
+            * harder than that.
+            */
+           need_tuples = (node->ss.ps.plan->qual != NIL ||
+                          node->ss.ps.plan->targetlist != NIL);
+
            scan = table_beginscan_bm(node->ss.ss_currentRelation,
                                      node->ss.ps.state->es_snapshot,
                                      0,
-                                     NULL);
+                                     NULL,
+                                     need_tuples);
 
            node->ss.ss_currentScanDesc = scan;
        }
@@ -208,7 +212,7 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
    for (;;)
    {
-       bool        skip_fetch;
+       bool        valid_block;
 
        CHECK_FOR_INTERRUPTS();
 
@@ -229,37 +233,14 @@ BitmapHeapNext(BitmapHeapScanState *node)
 
            BitmapAdjustPrefetchIterator(node, tbmres);
 
+           valid_block = table_scan_bitmap_next_block(scan, tbmres);
+
            if (tbmres->ntuples >= 0)
                node->exact_pages++;
            else
                node->lossy_pages++;
 
-           /*
-            * We can skip fetching the heap page if we don't need any fields
-            * from the heap, and the bitmap entries don't need rechecking,
-            * and all tuples on the page are visible to our transaction.
-            *
-            * XXX: It's a layering violation that we do these checks above
-            * tableam, they should probably moved below it at some point.
-            */
-           skip_fetch = (node->can_skip_fetch &&
-                         !tbmres->recheck &&
-                         VM_ALL_VISIBLE(node->ss.ss_currentRelation,
-                                        tbmres->blockno,
-                                        &node->vmbuffer));
-
-           if (skip_fetch)
-           {
-               /* can't be lossy in the skip_fetch case */
-               Assert(tbmres->ntuples >= 0);
-
-               /*
-                * The number of tuples on this page is put into
-                * node->return_empty_tuples.
-                */
-               node->return_empty_tuples = tbmres->ntuples;
-           }
-           else if (!table_scan_bitmap_next_block(scan, tbmres))
+           if (!valid_block)
            {
                /* AM doesn't think this block is valid, skip */
                continue;
@@ -302,52 +283,33 @@ BitmapHeapNext(BitmapHeapScanState *node)
         * should happen only when we have determined there is still something
         * to do on the current page, else we may uselessly prefetch the same
         * page we are just about to request for real.
-        *
-        * XXX: It's a layering violation that we do these checks above
-        * tableam, they should probably moved below it at some point.
         */
        BitmapPrefetch(node, scan);
 
-       if (node->return_empty_tuples > 0)
+       /*
+        * Attempt to fetch tuple from AM.
+        */
+       if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
        {
-           /*
-            * If we don't have to fetch the tuple, just return nulls.
-            */
-           ExecStoreAllNullTuple(slot);
-
-           if (--node->return_empty_tuples == 0)
-           {
-               /* no more tuples to return in the next round */
-               node->tbmres = tbmres = NULL;
-           }
+           /* nothing more to look at on this page */
+           node->tbmres = tbmres = NULL;
+           continue;
        }
-       else
+
+       /*
+        * If we are using lossy info, we have to recheck the qual conditions
+        * at every tuple.
+        */
+       if (tbmres->recheck)
        {
-           /*
-            * Attempt to fetch tuple from AM.
-            */
-           if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
+           econtext->ecxt_scantuple = slot;
+           if (!ExecQualAndReset(node->bitmapqualorig, econtext))
            {
-               /* nothing more to look at on this page */
-               node->tbmres = tbmres = NULL;
+               /* Fails recheck, so drop it and loop back for another */
+               InstrCountFiltered2(node, 1);
+               ExecClearTuple(slot);
                continue;
            }
-
-           /*
-            * If we are using lossy info, we have to recheck the qual
-            * conditions at every tuple.
-            */
-           if (tbmres->recheck)
-           {
-               econtext->ecxt_scantuple = slot;
-               if (!ExecQualAndReset(node->bitmapqualorig, econtext))
-               {
-                   /* Fails recheck, so drop it and loop back for another */
-                   InstrCountFiltered2(node, 1);
-                   ExecClearTuple(slot);
-                   continue;
-               }
-           }
        }
 
        /* OK to return this tuple */
@@ -519,7 +481,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                 * it did for the current heap page; which is not a certainty
                 * but is true in many cases.
                 */
-               skip_fetch = (node->can_skip_fetch &&
+               skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
                              (node->tbmres ? !node->tbmres->recheck : false) &&
                              VM_ALL_VISIBLE(node->ss.ss_currentRelation,
                                             tbmpre->blockno,
@@ -570,7 +532,7 @@ BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
                }
 
                /* As above, skip prefetch if we expect not to need page */
-               skip_fetch = (node->can_skip_fetch &&
+               skip_fetch = (!(scan->rs_flags & SO_NEED_TUPLES) &&
                              (node->tbmres ? !node->tbmres->recheck : false) &&
                              VM_ALL_VISIBLE(node->ss.ss_currentRelation,
                                             tbmpre->blockno,
@@ -640,8 +602,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
        tbm_end_shared_iterate(node->shared_prefetch_iterator);
    if (node->tbm)
        tbm_free(node->tbm);
-   if (node->vmbuffer != InvalidBuffer)
-       ReleaseBuffer(node->vmbuffer);
    if (node->pvmbuffer != InvalidBuffer)
        ReleaseBuffer(node->pvmbuffer);
    node->tbm = NULL;
@@ -651,7 +611,6 @@ ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
    node->initialized = false;
    node->shared_tbmiterator = NULL;
    node->shared_prefetch_iterator = NULL;
-   node->vmbuffer = InvalidBuffer;
    node->pvmbuffer = InvalidBuffer;
 
    ExecScanReScan(&node->ss);
@@ -696,8 +655,6 @@ ExecEndBitmapHeapScan(BitmapHeapScanState *node)
        tbm_end_shared_iterate(node->shared_tbmiterator);
    if (node->shared_prefetch_iterator)
        tbm_end_shared_iterate(node->shared_prefetch_iterator);
-   if (node->vmbuffer != InvalidBuffer)
-       ReleaseBuffer(node->vmbuffer);
    if (node->pvmbuffer != InvalidBuffer)
        ReleaseBuffer(node->pvmbuffer);
 
@@ -741,8 +698,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
    scanstate->tbm = NULL;
    scanstate->tbmiterator = NULL;
    scanstate->tbmres = NULL;
-   scanstate->return_empty_tuples = 0;
-   scanstate->vmbuffer = InvalidBuffer;
    scanstate->pvmbuffer = InvalidBuffer;
    scanstate->exact_pages = 0;
    scanstate->lossy_pages = 0;
@@ -753,7 +708,6 @@ ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
    scanstate->shared_tbmiterator = NULL;
    scanstate->shared_prefetch_iterator = NULL;
    scanstate->pstate = NULL;
-   scanstate->can_skip_fetch = false;
 
    /*
     * Miscellaneous initialization
index 2765efc4e5e9cd995985afd79352313bcda1d46f..750ea30852e3aae4b5278b967875ebd845fb00ac 100644 (file)
@@ -76,6 +76,16 @@ typedef struct HeapScanDescData
     */
    ParallelBlockTableScanWorkerData *rs_parallelworkerdata;
 
+   /*
+    * These fields are only used for bitmap scans for the "skip fetch"
+    * optimization. Bitmap scans needing no fields from the heap may skip
+    * fetching an all visible block, instead using the number of tuples per
+    * block reported by the bitmap to determine how many NULL-filled tuples
+    * to return.
+    */
+   Buffer      rs_vmbuffer;
+   int         rs_empty_tuples_pending;
+
    /* these fields only used in page-at-a-time mode and for bitmap scans */
    int         rs_cindex;      /* current tuple's index in vistuples */
    int         rs_ntuples;     /* number of visible tuples on page */
index e7eeb7540984f146d2c0c1d79233a5397cdbd889..be198fa3158b2569e51f022815b99ca9196fd6aa 100644 (file)
@@ -63,6 +63,13 @@ typedef enum ScanOptions
 
    /* unregister snapshot at scan end? */
    SO_TEMP_SNAPSHOT = 1 << 9,
+
+   /*
+    * At the discretion of the table AM, bitmap table scans may be able to
+    * skip fetching a block from the table if none of the table data is
+    * needed. If table data may be needed, set SO_NEED_TUPLES.
+    */
+   SO_NEED_TUPLES = 1 << 10,
 }          ScanOptions;
 
 /*
@@ -937,10 +944,13 @@ table_beginscan_strat(Relation rel, Snapshot snapshot,
  */
 static inline TableScanDesc
 table_beginscan_bm(Relation rel, Snapshot snapshot,
-                  int nkeys, struct ScanKeyData *key)
+                  int nkeys, struct ScanKeyData *key, bool need_tuple)
 {
    uint32      flags = SO_TYPE_BITMAPSCAN | SO_ALLOW_PAGEMODE;
 
+   if (need_tuple)
+       flags |= SO_NEED_TUPLES;
+
    return rel->rd_tableam->scan_begin(rel, snapshot, nkeys, key, NULL, flags);
 }
 
index e57ebd7288701a240ac52788c4bf6ec49d934d1e..fa2f70b7a4894104ba5ec7dfccf207a43147bc93 100644 (file)
@@ -1794,10 +1794,7 @@ typedef struct ParallelBitmapHeapState
  *     tbm                bitmap obtained from child index scan(s)
  *     tbmiterator        iterator for scanning current pages
  *     tbmres             current-page data
- *     can_skip_fetch     can we potentially skip tuple fetches in this scan?
- *     return_empty_tuples number of empty tuples to return
- *     vmbuffer           buffer for visibility-map lookups
- *     pvmbuffer          ditto, for prefetched pages
+ *     pvmbuffer          buffer for visibility-map lookups of prefetched pages
  *     exact_pages        total number of exact pages retrieved
  *     lossy_pages        total number of lossy pages retrieved
  *     prefetch_iterator  iterator for prefetching ahead of current page
@@ -1817,9 +1814,6 @@ typedef struct BitmapHeapScanState
    TIDBitmap  *tbm;
    TBMIterator *tbmiterator;
    TBMIterateResult *tbmres;
-   bool        can_skip_fetch;
-   int         return_empty_tuples;
-   Buffer      vmbuffer;
    Buffer      pvmbuffer;
    long        exact_pages;
    long        lossy_pages;