Use streaming read I/O in VACUUM's first phase
authorMelanie Plageman <[email protected]>
Fri, 14 Feb 2025 17:56:57 +0000 (12:56 -0500)
committerMelanie Plageman <[email protected]>
Fri, 14 Feb 2025 17:57:43 +0000 (12:57 -0500)
Make vacuum's first phase, which prunes and freezes tuples and records
dead TIDs, use the read stream API by by converting
heap_vac_scan_next_block() to a read stream callback.

Reviewed-by: Masahiko Sawada <[email protected]>
Reviewed-by: Thomas Munro <[email protected]>
Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com

src/backend/access/heap/vacuumlazy.c

index c4d0f77ee2fcb0a92bf79f476ab6e54f5f5778e8..08d89ab2bcd4a4276dce346095c414558f5211a6 100644 (file)
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
 #include "storage/lmgr.h"
+#include "storage/read_stream.h"
 #include "utils/lsyscache.h"
 #include "utils/pg_rusage.h"
 #include "utils/timestamp.h"
@@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo
 static void lazy_scan_heap(LVRelState *vacrel);
 static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
                                         VacuumParams *params);
-static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
-                                    uint8 *blk_info);
+static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
+                                           void *callback_private_data,
+                                           void *per_buffer_data);
 static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
 static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
                                   BlockNumber blkno, Page page,
@@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 static void
 lazy_scan_heap(LVRelState *vacrel)
 {
+   ReadStream *stream;
    BlockNumber rel_pages = vacrel->rel_pages,
-               blkno,
+               blkno = 0,
                next_fsm_block_to_vacuum = 0;
-   uint8       blk_info = 0;
+   void       *per_buffer_data = NULL;
    BlockNumber orig_eager_scan_success_limit =
        vacrel->eager_scan_remaining_successes; /* for logging */
    Buffer      vmbuffer = InvalidBuffer;
@@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel)
    vacrel->next_unskippable_eager_scanned = false;
    vacrel->next_unskippable_vmbuffer = InvalidBuffer;
 
-   while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info))
+   /* Set up the read stream for vacuum's first pass through the heap */
+   stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
+                                       vacrel->bstrategy,
+                                       vacrel->rel,
+                                       MAIN_FORKNUM,
+                                       heap_vac_scan_next_block,
+                                       vacrel,
+                                       sizeof(uint8));
+
+   while (true)
    {
        Buffer      buf;
        Page        page;
+       uint8       blk_info = 0;
        bool        has_lpdead_items;
        bool        vm_page_frozen = false;
        bool        got_cleanup_lock = false;
 
-       vacrel->scanned_pages++;
-       if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
-           vacrel->eager_scanned_pages++;
-
-       /* Report as block scanned, update error traceback information */
-       pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
-       update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
-                                blkno, InvalidOffsetNumber);
-
        vacuum_delay_point(false);
 
        /*
@@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel)
         * one-pass strategy, and the two-pass strategy with the index_cleanup
         * param set to 'off'.
         */
-       if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
+       if (vacrel->scanned_pages > 0 &&
+           vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
            lazy_check_wraparound_failsafe(vacrel);
 
        /*
@@ -1258,10 +1263,11 @@ lazy_scan_heap(LVRelState *vacrel)
 
            /*
             * Vacuum the Free Space Map to make newly-freed space visible on
-            * upper-level FSM pages.  Note we have not yet processed blkno.
+            * upper-level FSM pages. Note that blkno is the previously
+            * processed block.
             */
            FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
-                                   blkno);
+                                   blkno + 1);
            next_fsm_block_to_vacuum = blkno;
 
            /* Report that we are once again scanning the heap */
@@ -1269,6 +1275,26 @@ lazy_scan_heap(LVRelState *vacrel)
                                         PROGRESS_VACUUM_PHASE_SCAN_HEAP);
        }
 
+       buf = read_stream_next_buffer(stream, &per_buffer_data);
+
+       /* The relation is exhausted. */
+       if (!BufferIsValid(buf))
+           break;
+
+       blk_info = *((uint8 *) per_buffer_data);
+       CheckBufferIsPinnedOnce(buf);
+       page = BufferGetPage(buf);
+       blkno = BufferGetBlockNumber(buf);
+
+       vacrel->scanned_pages++;
+       if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
+           vacrel->eager_scanned_pages++;
+
+       /* Report as block scanned, update error traceback information */
+       pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
+       update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
+                                blkno, InvalidOffsetNumber);
+
        /*
         * Pin the visibility map page in case we need to mark the page
         * all-visible.  In most cases this will be very cheap, because we'll
@@ -1276,10 +1302,6 @@ lazy_scan_heap(LVRelState *vacrel)
         */
        visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
 
-       buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
-                                vacrel->bstrategy);
-       page = BufferGetPage(buf);
-
        /*
         * We need a buffer cleanup lock to prune HOT chains and defragment
         * the page in lazy_scan_prune.  But when it's not possible to acquire
@@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel)
    if (BufferIsValid(vmbuffer))
        ReleaseBuffer(vmbuffer);
 
-   /* report that everything is now scanned */
-   pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
+   /*
+    * Report that everything is now scanned. We never skip scanning the last
+    * block in the relation, so we can pass rel_pages here.
+    */
+   pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
+                                rel_pages);
 
    /* now we can compute the new value for pg_class.reltuples */
    vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
@@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel)
        Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
        vacrel->missed_dead_tuples;
 
+   read_stream_end(stream);
+
    /*
     * Do index vacuuming (call each index's ambulkdelete routine), then do
     * related heap vacuuming
@@ -1465,12 +1493,14 @@ lazy_scan_heap(LVRelState *vacrel)
    /*
     * Vacuum the remainder of the Free Space Map.  We must do this whether or
     * not there were indexes, and whether or not we bypassed index vacuuming.
+    * We can pass rel_pages here because we never skip scanning the last
+    * block of the relation.
     */
-   if (blkno > next_fsm_block_to_vacuum)
-       FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
+   if (rel_pages > next_fsm_block_to_vacuum)
+       FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
 
    /* report all blocks vacuumed */
-   pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
+   pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
 
    /* Do final index cleanup (call each index's amvacuumcleanup routine) */
    if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
@@ -1478,36 +1508,41 @@ lazy_scan_heap(LVRelState *vacrel)
 }
 
 /*
- * heap_vac_scan_next_block() -- get next block for vacuum to process
- *
- * lazy_scan_heap() calls here every time it needs to get the next block to
- * prune and vacuum.  The function uses the visibility map, vacuum options,
- * and various thresholds to skip blocks which do not need to be processed and
- * sets blkno to the next block to process.
- *
- * The block number of the next block to process is set in *blkno and its
- * visibility status and whether or not it was eager scanned is set in
- * *blk_info.
- *
- * The return value is false if there are no further blocks to process.
- *
- * vacrel is an in/out parameter here.  Vacuum options and information about
- * the relation are read.  vacrel->skippedallvis is set if we skip a block
- * that's all-visible but not all-frozen, to ensure that we don't update
- * relfrozenxid in that case.  vacrel also holds information about the next
- * unskippable block, as bookkeeping for this function.
+ * heap_vac_scan_next_block() -- read stream callback to get the next block
+ * for vacuum to process
+ *
+ * Every time lazy_scan_heap() needs a new block to process during its first
+ * phase, it invokes read_stream_next_buffer() with a stream set up to call
+ * heap_vac_scan_next_block() to get the next block.
+ *
+ * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
+ * various thresholds to skip blocks which do not need to be processed and
+ * returns the next block to process or InvalidBlockNumber if there are no
+ * remaining blocks.
+ *
+ * The visibility status of the next block to process and whether or not it
+ * was eager scanned is set in the per_buffer_data.
+ *
+ * callback_private_data contains a reference to the LVRelState, passed to the
+ * read stream API during stream setup. The LVRelState is an in/out parameter
+ * here (locally named `vacrel`). Vacuum options and information about the
+ * relation are read from it. vacrel->skippedallvis is set if we skip a block
+ * that's all-visible but not all-frozen (to ensure that we don't update
+ * relfrozenxid in that case). vacrel also holds information about the next
+ * unskippable block -- as bookkeeping for this function.
  */
-static bool
-heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
-                        uint8 *blk_info)
+static BlockNumber
+heap_vac_scan_next_block(ReadStream *stream,
+                        void *callback_private_data,
+                        void *per_buffer_data)
 {
    BlockNumber next_block;
+   LVRelState *vacrel = callback_private_data;
+   uint8       blk_info = 0;
 
    /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
    next_block = vacrel->current_block + 1;
 
-   *blk_info = 0;
-
    /* Have we reached the end of the relation? */
    if (next_block >= vacrel->rel_pages)
    {
@@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
            ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
            vacrel->next_unskippable_vmbuffer = InvalidBuffer;
        }
-       *blkno = vacrel->rel_pages;
-       return false;
+       return InvalidBlockNumber;
    }
 
    /*
@@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
         * but chose not to.  We know that they are all-visible in the VM,
         * otherwise they would've been unskippable.
         */
-       *blkno = vacrel->current_block = next_block;
-       *blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
-       return true;
+       vacrel->current_block = next_block;
+       blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
+       *((uint8 *) per_buffer_data) = blk_info;
+       return vacrel->current_block;
    }
    else
    {
@@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
         */
        Assert(next_block == vacrel->next_unskippable_block);
 
-       *blkno = vacrel->current_block = next_block;
+       vacrel->current_block = next_block;
        if (vacrel->next_unskippable_allvis)
-           *blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
+           blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
        if (vacrel->next_unskippable_eager_scanned)
-           *blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
-       return true;
+           blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
+       *((uint8 *) per_buffer_data) = blk_info;
+       return vacrel->current_block;
    }
 }