From: Melanie Plageman Date: Fri, 14 Feb 2025 17:56:57 +0000 (-0500) Subject: Use streaming read I/O in VACUUM's first phase X-Git-Tag: REL_18_BETA1~876 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=9256822608f3b0636cfd88c3436cdceda7003cf3;p=postgresql.git Use streaming read I/O in VACUUM's first phase Make vacuum's first phase, which prunes and freezes tuples and records dead TIDs, use the read stream API by by converting heap_vac_scan_next_block() to a read stream callback. Reviewed-by: Masahiko Sawada Reviewed-by: Thomas Munro Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com --- diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index c4d0f77ee2f..08d89ab2bcd 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -153,6 +153,7 @@ #include "storage/bufmgr.h" #include "storage/freespace.h" #include "storage/lmgr.h" +#include "storage/read_stream.h" #include "utils/lsyscache.h" #include "utils/pg_rusage.h" #include "utils/timestamp.h" @@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo static void lazy_scan_heap(LVRelState *vacrel); static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params); -static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, - uint8 *blk_info); +static BlockNumber heap_vac_scan_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data); static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno, Page page, @@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, static void lazy_scan_heap(LVRelState *vacrel) { + ReadStream *stream; BlockNumber rel_pages = vacrel->rel_pages, - blkno, + blkno = 0, next_fsm_block_to_vacuum = 0; - uint8 blk_info = 0; + void *per_buffer_data = NULL; BlockNumber orig_eager_scan_success_limit = vacrel->eager_scan_remaining_successes; /* for logging */ Buffer vmbuffer = InvalidBuffer; @@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel) vacrel->next_unskippable_eager_scanned = false; vacrel->next_unskippable_vmbuffer = InvalidBuffer; - while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info)) + /* Set up the read stream for vacuum's first pass through the heap */ + stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE, + vacrel->bstrategy, + vacrel->rel, + MAIN_FORKNUM, + heap_vac_scan_next_block, + vacrel, + sizeof(uint8)); + + while (true) { Buffer buf; Page page; + uint8 blk_info = 0; bool has_lpdead_items; bool vm_page_frozen = false; bool got_cleanup_lock = false; - vacrel->scanned_pages++; - if (blk_info & VAC_BLK_WAS_EAGER_SCANNED) - vacrel->eager_scanned_pages++; - - /* Report as block scanned, update error traceback information */ - pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); - update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, - blkno, InvalidOffsetNumber); - vacuum_delay_point(false); /* @@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel) * one-pass strategy, and the two-pass strategy with the index_cleanup * param set to 'off'. */ - if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) + if (vacrel->scanned_pages > 0 && + vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) lazy_check_wraparound_failsafe(vacrel); /* @@ -1258,10 +1263,11 @@ lazy_scan_heap(LVRelState *vacrel) /* * Vacuum the Free Space Map to make newly-freed space visible on - * upper-level FSM pages. Note we have not yet processed blkno. + * upper-level FSM pages. Note that blkno is the previously + * processed block. */ FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, - blkno); + blkno + 1); next_fsm_block_to_vacuum = blkno; /* Report that we are once again scanning the heap */ @@ -1269,6 +1275,26 @@ lazy_scan_heap(LVRelState *vacrel) PROGRESS_VACUUM_PHASE_SCAN_HEAP); } + buf = read_stream_next_buffer(stream, &per_buffer_data); + + /* The relation is exhausted. */ + if (!BufferIsValid(buf)) + break; + + blk_info = *((uint8 *) per_buffer_data); + CheckBufferIsPinnedOnce(buf); + page = BufferGetPage(buf); + blkno = BufferGetBlockNumber(buf); + + vacrel->scanned_pages++; + if (blk_info & VAC_BLK_WAS_EAGER_SCANNED) + vacrel->eager_scanned_pages++; + + /* Report as block scanned, update error traceback information */ + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); + update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, + blkno, InvalidOffsetNumber); + /* * Pin the visibility map page in case we need to mark the page * all-visible. In most cases this will be very cheap, because we'll @@ -1276,10 +1302,6 @@ lazy_scan_heap(LVRelState *vacrel) */ visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); - buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, - vacrel->bstrategy); - page = BufferGetPage(buf); - /* * We need a buffer cleanup lock to prune HOT chains and defragment * the page in lazy_scan_prune. But when it's not possible to acquire @@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel) if (BufferIsValid(vmbuffer)) ReleaseBuffer(vmbuffer); - /* report that everything is now scanned */ - pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); + /* + * Report that everything is now scanned. We never skip scanning the last + * block in the relation, so we can pass rel_pages here. + */ + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, + rel_pages); /* now we can compute the new value for pg_class.reltuples */ vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, @@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel) Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + vacrel->missed_dead_tuples; + read_stream_end(stream); + /* * Do index vacuuming (call each index's ambulkdelete routine), then do * related heap vacuuming @@ -1465,12 +1493,14 @@ lazy_scan_heap(LVRelState *vacrel) /* * Vacuum the remainder of the Free Space Map. We must do this whether or * not there were indexes, and whether or not we bypassed index vacuuming. + * We can pass rel_pages here because we never skip scanning the last + * block of the relation. */ - if (blkno > next_fsm_block_to_vacuum) - FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); + if (rel_pages > next_fsm_block_to_vacuum) + FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages); /* report all blocks vacuumed */ - pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); + pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages); /* Do final index cleanup (call each index's amvacuumcleanup routine) */ if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) @@ -1478,36 +1508,41 @@ lazy_scan_heap(LVRelState *vacrel) } /* - * heap_vac_scan_next_block() -- get next block for vacuum to process - * - * lazy_scan_heap() calls here every time it needs to get the next block to - * prune and vacuum. The function uses the visibility map, vacuum options, - * and various thresholds to skip blocks which do not need to be processed and - * sets blkno to the next block to process. - * - * The block number of the next block to process is set in *blkno and its - * visibility status and whether or not it was eager scanned is set in - * *blk_info. - * - * The return value is false if there are no further blocks to process. - * - * vacrel is an in/out parameter here. Vacuum options and information about - * the relation are read. vacrel->skippedallvis is set if we skip a block - * that's all-visible but not all-frozen, to ensure that we don't update - * relfrozenxid in that case. vacrel also holds information about the next - * unskippable block, as bookkeeping for this function. + * heap_vac_scan_next_block() -- read stream callback to get the next block + * for vacuum to process + * + * Every time lazy_scan_heap() needs a new block to process during its first + * phase, it invokes read_stream_next_buffer() with a stream set up to call + * heap_vac_scan_next_block() to get the next block. + * + * heap_vac_scan_next_block() uses the visibility map, vacuum options, and + * various thresholds to skip blocks which do not need to be processed and + * returns the next block to process or InvalidBlockNumber if there are no + * remaining blocks. + * + * The visibility status of the next block to process and whether or not it + * was eager scanned is set in the per_buffer_data. + * + * callback_private_data contains a reference to the LVRelState, passed to the + * read stream API during stream setup. The LVRelState is an in/out parameter + * here (locally named `vacrel`). Vacuum options and information about the + * relation are read from it. vacrel->skippedallvis is set if we skip a block + * that's all-visible but not all-frozen (to ensure that we don't update + * relfrozenxid in that case). vacrel also holds information about the next + * unskippable block -- as bookkeeping for this function. */ -static bool -heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, - uint8 *blk_info) +static BlockNumber +heap_vac_scan_next_block(ReadStream *stream, + void *callback_private_data, + void *per_buffer_data) { BlockNumber next_block; + LVRelState *vacrel = callback_private_data; + uint8 blk_info = 0; /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ next_block = vacrel->current_block + 1; - *blk_info = 0; - /* Have we reached the end of the relation? */ if (next_block >= vacrel->rel_pages) { @@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, ReleaseBuffer(vacrel->next_unskippable_vmbuffer); vacrel->next_unskippable_vmbuffer = InvalidBuffer; } - *blkno = vacrel->rel_pages; - return false; + return InvalidBlockNumber; } /* @@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, * but chose not to. We know that they are all-visible in the VM, * otherwise they would've been unskippable. */ - *blkno = vacrel->current_block = next_block; - *blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; - return true; + vacrel->current_block = next_block; + blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; + *((uint8 *) per_buffer_data) = blk_info; + return vacrel->current_block; } else { @@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, */ Assert(next_block == vacrel->next_unskippable_block); - *blkno = vacrel->current_block = next_block; + vacrel->current_block = next_block; if (vacrel->next_unskippable_allvis) - *blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; + blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; if (vacrel->next_unskippable_eager_scanned) - *blk_info |= VAC_BLK_WAS_EAGER_SCANNED; - return true; + blk_info |= VAC_BLK_WAS_EAGER_SCANNED; + *((uint8 *) per_buffer_data) = blk_info; + return vacrel->current_block; } }