#include "access/heapam.h"
#include "access/heapam_xlog.h"
#include "access/htup_details.h"
+#include "access/multixact.h"
#include "access/transam.h"
#include "access/xlog.h"
#include "access/xloginsert.h"
+#include "commands/vacuum.h"
+#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
#include "storage/bufmgr.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
-/* Working data for heap_page_prune and subroutines */
+/* Working data for heap_page_prune_and_freeze() and subroutines */
typedef struct
{
+ /*-------------------------------------------------------
+ * Arguments passed to heap_page_and_freeze()
+ *-------------------------------------------------------
+ */
+
/* tuple visibility test, initialized for the relation */
GlobalVisState *vistest;
/* whether or not dead items can be set LP_UNUSED during pruning */
bool mark_unused_now;
+ /* whether to attempt freezing tuples */
+ bool freeze;
+ struct VacuumCutoffs *cutoffs;
- TransactionId new_prune_xid; /* new prune hint value for page */
- TransactionId snapshotConflictHorizon; /* latest xid removed */
+ /*-------------------------------------------------------
+ * Fields describing what to do to the page
+ *-------------------------------------------------------
+ */
+ TransactionId new_prune_xid; /* new prune hint value */
+ TransactionId latest_xid_removed;
int nredirected; /* numbers of entries in arrays below */
int ndead;
int nunused;
+ int nfrozen;
/* arrays that accumulate indexes of items to be changed */
OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
OffsetNumber nowdead[MaxHeapTuplesPerPage];
OffsetNumber nowunused[MaxHeapTuplesPerPage];
+ HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
+
+ /*-------------------------------------------------------
+ * Working state for HOT chain processing
+ *-------------------------------------------------------
+ */
/*
* 'root_items' contains offsets of all LP_REDIRECT line pointers and
*/
bool processed[MaxHeapTuplesPerPage + 1];
+ /*
+ * Tuple visibility is only computed once for each tuple, for correctness
+ * and efficiency reasons; see comment in heap_page_prune_and_freeze() for
+ * details. This is of type int8[], instead of HTSV_Result[], so we can
+ * use -1 to indicate no visibility has been computed, e.g. for LP_DEAD
+ * items.
+ *
+ * This needs to be MaxHeapTuplesPerPage + 1 long as FirstOffsetNumber is
+ * 1. Otherwise every access would need to subtract 1.
+ */
+ int8 htsv[MaxHeapTuplesPerPage + 1];
+
+ /*
+ * Freezing-related state.
+ */
+ HeapPageFreeze pagefrz;
+
+ /*-------------------------------------------------------
+ * Information about what was done
+ *
+ * These fields are not used by pruning itself for the most part, but are
+ * used to collect information about what was pruned and what state the
+ * page is in after pruning, for the benefit of the caller. They are
+ * copied to the caller's PruneFreezeResult at the end.
+ * -------------------------------------------------------
+ */
+
int ndeleted; /* Number of tuples deleted from the page */
+
+ /* Number of live and recently dead tuples, after pruning */
+ int live_tuples;
+ int recently_dead_tuples;
+
+ /* Whether or not the page makes rel truncation unsafe */
+ bool hastup;
+
+ /*
+ * LP_DEAD items on the page after pruning. Includes existing LP_DEAD
+ * items
+ */
+ int lpdead_items; /* number of items in the array */
+ OffsetNumber *deadoffsets; /* points directly to presult->deadoffsets */
+
+ /*
+ * all_visible and all_frozen indicate if the all-visible and all-frozen
+ * bits in the visibility map can be set for this page after pruning.
+ *
+ * visibility_cutoff_xid is the newest xmin of live tuples on the page.
+ * The caller can use it as the conflict horizon, when setting the VM
+ * bits. It is only valid if we froze some tuples, and all_frozen is
+ * true.
+ *
+ * NOTE: all_visible and all_frozen don't include LP_DEAD items. That's
+ * convenient for heap_page_prune_and_freeze(), to use them to decide
+ * whether to freeze the page or not. The all_visible and all_frozen
+ * values returned to the caller are adjusted to include LP_DEAD items at
+ * the end.
+ *
+ * all_frozen should only be considered valid if all_visible is also set;
+ * we don't bother to clear the all_frozen flag every time we clear the
+ * all_visible flag.
+ */
+ bool all_visible;
+ bool all_frozen;
+ TransactionId visibility_cutoff_xid;
} PruneState;
/* Local functions */
static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
HeapTuple tup,
Buffer buffer);
+static inline HTSV_Result htsv_get_valid_status(int status);
static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
- OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate);
+ OffsetNumber rootoffnum, PruneState *prstate);
static void heap_prune_record_prunable(PruneState *prstate, TransactionId xid);
static void heap_prune_record_redirect(PruneState *prstate,
- OffsetNumber offnum, OffsetNumber rdoffnum, bool was_normal);
-static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum, bool was_normal);
-static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
+ OffsetNumber offnum, OffsetNumber rdoffnum,
+ bool was_normal);
+static void heap_prune_record_dead(PruneState *prstate, OffsetNumber offnum,
+ bool was_normal);
+static void heap_prune_record_dead_or_unused(PruneState *prstate, OffsetNumber offnum,
+ bool was_normal);
static void heap_prune_record_unused(PruneState *prstate, OffsetNumber offnum, bool was_normal);
static void heap_prune_record_unchanged_lp_unused(Page page, PruneState *prstate, OffsetNumber offnum);
-static void heap_prune_record_unchanged_lp_normal(Page page, int8 *htsv, PruneState *prstate, OffsetNumber offnum);
+static void heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_unchanged_lp_dead(Page page, PruneState *prstate, OffsetNumber offnum);
static void heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum);
if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
{
OffsetNumber dummy_off_loc;
- PruneResult presult;
+ PruneFreezeResult presult;
/*
* For now, pass mark_unused_now as false regardless of whether or
* not the relation has indexes, since we cannot safely determine
* that during on-access pruning with the current implementation.
*/
- heap_page_prune(relation, buffer, vistest, 0,
- &presult, PRUNE_ON_ACCESS, &dummy_off_loc);
+ heap_page_prune_and_freeze(relation, buffer, vistest, 0,
+ NULL, &presult, PRUNE_ON_ACCESS, &dummy_off_loc, NULL, NULL);
/*
* Report the number of tuples reclaimed to pgstats. This is
/*
- * Prune and repair fragmentation in the specified page.
+ * Prune and repair fragmentation and potentially freeze tuples on the
+ * specified page.
*
* Caller must have pin and buffer cleanup lock on the page. Note that we
* don't update the FSM information for page on caller's behalf. Caller might
* also need to account for a reduction in the length of the line pointer
* array following array truncation by us.
*
+ * If the HEAP_PRUNE_FREEZE option is set, we will also freeze tuples if it's
+ * required in order to advance relfrozenxid / relminmxid, or if it's
+ * considered advantageous for overall system performance to do so now. The
+ * 'cutoffs', 'presult', 'new_refrozen_xid' and 'new_relmin_mxid' arguments
+ * are required when freezing. When HEAP_PRUNE_FREEZE option is set, we also
+ * set presult->all_visible and presult->all_frozen on exit, to indicate if
+ * the VM bits can be set. They are always set to false when the
+ * HEAP_PRUNE_FREEZE option is not set, because at the moment only callers
+ * that also freeze need that information.
+ *
* vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
* (see heap_prune_satisfies_vacuum).
*
* MARK_UNUSED_NOW indicates that dead items can be set LP_UNUSED during
* pruning.
*
- * presult contains output parameters needed by callers such as the number of
- * tuples removed and the number of line pointers newly marked LP_DEAD.
- * heap_page_prune() is responsible for initializing it.
+ * FREEZE indicates that we will also freeze tuples, and will return
+ * 'all_visible', 'all_frozen' flags to the caller.
+ *
+ * cutoffs contains the freeze cutoffs, established by VACUUM at the beginning
+ * of vacuuming the relation. Required if HEAP_PRUNE_FREEZE option is set.
+ *
+ * presult contains output parameters needed by callers, such as the number of
+ * tuples removed and the offsets of dead items on the page after pruning.
+ * heap_page_prune_and_freeze() is responsible for initializing it. Required
+ * by all callers.
*
* reason indicates why the pruning is performed. It is included in the WAL
* record for debugging and analysis purposes, but otherwise has no effect.
*
* off_loc is the offset location required by the caller to use in error
* callback.
+ *
+ * new_relfrozen_xid and new_relmin_xid must provided by the caller if the
+ * HEAP_PRUNE_FREEZE option is set. On entry, they contain the oldest XID and
+ * multi-XID seen on the relation so far. They will be updated with oldest
+ * values present on the page after pruning. After processing the whole
+ * relation, VACUUM can use these values as the new relfrozenxid/relminmxid
+ * for the relation.
*/
void
-heap_page_prune(Relation relation, Buffer buffer,
- GlobalVisState *vistest,
- int options,
- PruneResult *presult,
- PruneReason reason,
- OffsetNumber *off_loc)
+heap_page_prune_and_freeze(Relation relation, Buffer buffer,
+ GlobalVisState *vistest,
+ int options,
+ struct VacuumCutoffs *cutoffs,
+ PruneFreezeResult *presult,
+ PruneReason reason,
+ OffsetNumber *off_loc,
+ TransactionId *new_relfrozen_xid,
+ MultiXactId *new_relmin_mxid)
{
Page page = BufferGetPage(buffer);
BlockNumber blockno = BufferGetBlockNumber(buffer);
maxoff;
PruneState prstate;
HeapTupleData tup;
+ bool do_freeze;
+ bool do_prune;
+ bool do_hint;
+ bool hint_bit_fpi;
+ int64 fpi_before = pgWalUsage.wal_fpi;
+
+ /* Copy parameters to prstate */
+ prstate.vistest = vistest;
+ prstate.mark_unused_now = (options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
+ prstate.freeze = (options & HEAP_PAGE_PRUNE_FREEZE) != 0;
+ prstate.cutoffs = cutoffs;
/*
* Our strategy is to scan the page and make lists of items to change,
* initialize the rest of our working state.
*/
prstate.new_prune_xid = InvalidTransactionId;
- prstate.vistest = vistest;
- prstate.mark_unused_now = (options & HEAP_PAGE_PRUNE_MARK_UNUSED_NOW) != 0;
- prstate.snapshotConflictHorizon = InvalidTransactionId;
- prstate.nredirected = prstate.ndead = prstate.nunused = 0;
- prstate.ndeleted = 0;
+ prstate.latest_xid_removed = InvalidTransactionId;
+ prstate.nredirected = prstate.ndead = prstate.nunused = prstate.nfrozen = 0;
prstate.nroot_items = 0;
prstate.nheaponly_items = 0;
+ /* initialize page freezing working state */
+ prstate.pagefrz.freeze_required = false;
+ if (prstate.freeze)
+ {
+ Assert(new_relfrozen_xid && new_relmin_mxid);
+ prstate.pagefrz.FreezePageRelfrozenXid = *new_relfrozen_xid;
+ prstate.pagefrz.NoFreezePageRelfrozenXid = *new_relfrozen_xid;
+ prstate.pagefrz.FreezePageRelminMxid = *new_relmin_mxid;
+ prstate.pagefrz.NoFreezePageRelminMxid = *new_relmin_mxid;
+ }
+ else
+ {
+ Assert(new_relfrozen_xid == NULL && new_relmin_mxid == NULL);
+ prstate.pagefrz.FreezePageRelminMxid = InvalidMultiXactId;
+ prstate.pagefrz.NoFreezePageRelminMxid = InvalidMultiXactId;
+ prstate.pagefrz.FreezePageRelfrozenXid = InvalidTransactionId;
+ prstate.pagefrz.NoFreezePageRelfrozenXid = InvalidTransactionId;
+ }
+
+ prstate.ndeleted = 0;
+ prstate.live_tuples = 0;
+ prstate.recently_dead_tuples = 0;
+ prstate.hastup = false;
+ prstate.lpdead_items = 0;
+ prstate.deadoffsets = presult->deadoffsets;
+
/*
- * presult->htsv is not initialized here because all ntuple spots in the
- * array will be set either to a valid HTSV_Result value or -1.
+ * Caller may update the VM after we're done. We can keep track of
+ * whether the page will be all-visible and all-frozen after pruning and
+ * freezing to help the caller to do that.
+ *
+ * Currently, only VACUUM sets the VM bits. To save the effort, only do
+ * the bookkeeping if the caller needs it. Currently, that's tied to
+ * HEAP_PAGE_PRUNE_FREEZE, but it could be a separate flag if you wanted
+ * to update the VM bits without also freezing or freeze without also
+ * setting the VM bits.
+ *
+ * In addition to telling the caller whether it can set the VM bit, we
+ * also use 'all_visible' and 'all_frozen' for our own decision-making. If
+ * the whole page would become frozen, we consider opportunistically
+ * freezing tuples. We will not be able to freeze the whole page if there
+ * are tuples present that are not visible to everyone or if there are
+ * dead tuples which are not yet removable. However, dead tuples which
+ * will be removed by the end of vacuuming should not preclude us from
+ * opportunistically freezing. Because of that, we do not clear
+ * all_visible when we see LP_DEAD items. We fix that at the end of the
+ * function, when we return the value to the caller, so that the caller
+ * doesn't set the VM bit incorrectly.
*/
- presult->ndeleted = 0;
- presult->nnewlpdead = 0;
+ if (prstate.freeze)
+ {
+ prstate.all_visible = true;
+ prstate.all_frozen = true;
+ }
+ else
+ {
+ /*
+ * Initializing to false allows skipping the work to update them in
+ * heap_prune_record_unchanged_lp_normal().
+ */
+ prstate.all_visible = false;
+ prstate.all_frozen = false;
+ }
+
+ /*
+ * The visibility cutoff xid is the newest xmin of live tuples on the
+ * page. In the common case, this will be set as the conflict horizon the
+ * caller can use for updating the VM. If, at the end of freezing and
+ * pruning, the page is all-frozen, there is no possibility that any
+ * running transaction on the standby does not see tuples on the page as
+ * all-visible, so the conflict horizon remains InvalidTransactionId.
+ */
+ prstate.visibility_cutoff_xid = InvalidTransactionId;
maxoff = PageGetMaxOffsetNumber(page);
tup.t_tableOid = RelationGetRelid(relation);
/*
* Determine HTSV for all tuples, and queue them up for processing as HOT
- * chain roots or as a heap-only items.
+ * chain roots or as heap-only items.
*
* Determining HTSV only once for each tuple is required for correctness,
* to deal with cases where running HTSV twice could result in different
* results. For example, RECENTLY_DEAD can turn to DEAD if another
* checked item causes GlobalVisTestIsRemovableFullXid() to update the
* horizon, or INSERT_IN_PROGRESS can change to DEAD if the inserting
- * transaction aborts. VACUUM assumes that there are no normal DEAD
- * tuples left on the page after pruning, so it needs to have the same
- * understanding of what is DEAD and what is not.
+ * transaction aborts.
*
* It's also good for performance. Most commonly tuples within a page are
* stored at decreasing offsets (while the items are stored at increasing
*off_loc = offnum;
prstate.processed[offnum] = false;
- presult->htsv[offnum] = -1;
+ prstate.htsv[offnum] = -1;
/* Nothing to do if slot doesn't contain a tuple */
if (!ItemIdIsUsed(itemid))
tup.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&tup.t_self, blockno, offnum);
- presult->htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
- buffer);
+ prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
+ buffer);
if (!HeapTupleHeaderIsHeapOnly(htup))
prstate.root_items[prstate.nroot_items++] = offnum;
prstate.heaponly_items[prstate.nheaponly_items++] = offnum;
}
+ /*
+ * If checksums are enabled, heap_prune_satisfies_vacuum() may have caused
+ * an FPI to be emitted.
+ */
+ hint_bit_fpi = fpi_before != pgWalUsage.wal_fpi;
+
/*
* Process HOT chains.
*
*off_loc = offnum;
/* Process this item or chain of items */
- heap_prune_chain(page, blockno, maxoff,
- offnum, presult->htsv, &prstate);
+ heap_prune_chain(page, blockno, maxoff, offnum, &prstate);
}
/*
* return true for an XMIN_INVALID tuple, so this code will work even
* when there were sequential updates within the aborted transaction.)
*/
- if (presult->htsv[offnum] == HEAPTUPLE_DEAD)
+ if (prstate.htsv[offnum] == HEAPTUPLE_DEAD)
{
ItemId itemid = PageGetItemId(page, offnum);
HeapTupleHeader htup = (HeapTupleHeader) PageGetItem(page, itemid);
if (likely(!HeapTupleHeaderIsHotUpdated(htup)))
{
HeapTupleHeaderAdvanceConflictHorizon(htup,
- &prstate.snapshotConflictHorizon);
+ &prstate.latest_xid_removed);
heap_prune_record_unused(&prstate, offnum, true);
}
else
}
}
else
- heap_prune_record_unchanged_lp_normal(page, presult->htsv, &prstate, offnum);
+ heap_prune_record_unchanged_lp_normal(page, &prstate, offnum);
}
/* We should now have processed every tuple exactly once */
/* Clear the offset information once we have processed the given page. */
*off_loc = InvalidOffsetNumber;
- /* Any error while applying the changes is critical */
- START_CRIT_SECTION();
+ do_prune = prstate.nredirected > 0 ||
+ prstate.ndead > 0 ||
+ prstate.nunused > 0;
- /* Have we found any prunable items? */
- if (prstate.nredirected > 0 || prstate.ndead > 0 || prstate.nunused > 0)
+ /*
+ * Even if we don't prune anything, if we found a new value for the
+ * pd_prune_xid field or the page was marked full, we will update the hint
+ * bit.
+ */
+ do_hint = ((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
+ PageIsFull(page);
+
+ /*
+ * Decide if we want to go ahead with freezing according to the freeze
+ * plans we prepared, or not.
+ */
+ do_freeze = false;
+ if (prstate.freeze)
+ {
+ if (prstate.pagefrz.freeze_required)
+ {
+ /*
+ * heap_prepare_freeze_tuple indicated that at least one XID/MXID
+ * from before FreezeLimit/MultiXactCutoff is present. Must
+ * freeze to advance relfrozenxid/relminmxid.
+ */
+ do_freeze = true;
+ }
+ else
+ {
+ /*
+ * Opportunistically freeze the page if we are generating an FPI
+ * anyway and if doing so means that we can set the page
+ * all-frozen afterwards (might not happen until VACUUM's final
+ * heap pass).
+ *
+ * XXX: Previously, we knew if pruning emitted an FPI by checking
+ * pgWalUsage.wal_fpi before and after pruning. Once the freeze
+ * and prune records were combined, this heuristic couldn't be
+ * used anymore. The opportunistic freeze heuristic must be
+ * improved; however, for now, try to approximate the old logic.
+ */
+ if (prstate.all_visible && prstate.all_frozen && prstate.nfrozen > 0)
+ {
+ /*
+ * Freezing would make the page all-frozen. Have already
+ * emitted an FPI or will do so anyway?
+ */
+ if (RelationNeedsWAL(relation))
+ {
+ if (hint_bit_fpi)
+ do_freeze = true;
+ else if (do_prune)
+ {
+ if (XLogCheckBufferNeedsBackup(buffer))
+ do_freeze = true;
+ }
+ else if (do_hint)
+ {
+ if (XLogHintBitIsNeeded() && XLogCheckBufferNeedsBackup(buffer))
+ do_freeze = true;
+ }
+ }
+ }
+ }
+ }
+
+ if (do_freeze)
{
/*
- * Apply the planned item changes, then repair page fragmentation, and
- * update the page's hint bit about whether it has free line pointers.
+ * Validate the tuples we will be freezing before entering the
+ * critical section.
*/
- heap_page_prune_execute(buffer, false,
- prstate.redirected, prstate.nredirected,
- prstate.nowdead, prstate.ndead,
- prstate.nowunused, prstate.nunused);
+ heap_pre_freeze_checks(buffer, prstate.frozen, prstate.nfrozen);
+ }
+ else if (prstate.nfrozen > 0)
+ {
+ /*
+ * The page contained some tuples that were not already frozen, and we
+ * chose not to freeze them now. The page won't be all-frozen then.
+ */
+ Assert(!prstate.pagefrz.freeze_required);
+ prstate.all_frozen = false;
+ prstate.nfrozen = 0; /* avoid miscounts in instrumentation */
+ }
+ else
+ {
+ /*
+ * We have no freeze plans to execute. The page might already be
+ * all-frozen (perhaps only following pruning), though. Such pages
+ * can be marked all-frozen in the VM by our caller, even though none
+ * of its tuples were newly frozen here.
+ */
+ }
+
+ /* Any error while applying the changes is critical */
+ START_CRIT_SECTION();
+
+ if (do_hint)
+ {
/*
* Update the page's pd_prune_xid field to either zero, or the lowest
* XID of any soon-prunable tuple.
*/
PageClearFull(page);
+ /*
+ * If that's all we had to do to the page, this is a non-WAL-logged
+ * hint. If we are going to freeze or prune the page, we will mark
+ * the buffer dirty below.
+ */
+ if (!do_freeze && !do_prune)
+ MarkBufferDirtyHint(buffer, true);
+ }
+
+ if (do_prune || do_freeze)
+ {
+ /* Apply the planned item changes and repair page fragmentation. */
+ if (do_prune)
+ {
+ heap_page_prune_execute(buffer, false,
+ prstate.redirected, prstate.nredirected,
+ prstate.nowdead, prstate.ndead,
+ prstate.nowunused, prstate.nunused);
+ }
+
+ if (do_freeze)
+ heap_freeze_prepared_tuples(buffer, prstate.frozen, prstate.nfrozen);
+
MarkBufferDirty(buffer);
/*
*/
if (RelationNeedsWAL(relation))
{
+ /*
+ * The snapshotConflictHorizon for the whole record should be the
+ * most conservative of all the horizons calculated for any of the
+ * possible modifications. If this record will prune tuples, any
+ * transactions on the standby older than the youngest xmax of the
+ * most recently removed tuple this record will prune will
+ * conflict. If this record will freeze tuples, any transactions
+ * on the standby with xids older than the youngest tuple this
+ * record will freeze will conflict.
+ */
+ TransactionId frz_conflict_horizon = InvalidTransactionId;
+ TransactionId conflict_xid;
+
+ /*
+ * We can use the visibility_cutoff_xid as our cutoff for
+ * conflicts when the whole page is eligible to become all-frozen
+ * in the VM once we're done with it. Otherwise we generate a
+ * conservative cutoff by stepping back from OldestXmin.
+ */
+ if (do_freeze)
+ {
+ if (prstate.all_visible && prstate.all_frozen)
+ frz_conflict_horizon = prstate.visibility_cutoff_xid;
+ else
+ {
+ /* Avoids false conflicts when hot_standby_feedback in use */
+ frz_conflict_horizon = prstate.cutoffs->OldestXmin;
+ TransactionIdRetreat(frz_conflict_horizon);
+ }
+ }
+
+ if (TransactionIdFollows(frz_conflict_horizon, prstate.latest_xid_removed))
+ conflict_xid = frz_conflict_horizon;
+ else
+ conflict_xid = prstate.latest_xid_removed;
+
log_heap_prune_and_freeze(relation, buffer,
- prstate.snapshotConflictHorizon,
+ conflict_xid,
true, reason,
- NULL, 0,
+ prstate.frozen, prstate.nfrozen,
prstate.redirected, prstate.nredirected,
prstate.nowdead, prstate.ndead,
prstate.nowunused, prstate.nunused);
}
}
- else
- {
- /*
- * If we didn't prune anything, but have found a new value for the
- * pd_prune_xid field, update it and mark the buffer dirty. This is
- * treated as a non-WAL-logged hint.
- *
- * Also clear the "page is full" flag if it is set, since there's no
- * point in repeating the prune/defrag process until something else
- * happens to the page.
- */
- if (((PageHeader) page)->pd_prune_xid != prstate.new_prune_xid ||
- PageIsFull(page))
- {
- ((PageHeader) page)->pd_prune_xid = prstate.new_prune_xid;
- PageClearFull(page);
- MarkBufferDirtyHint(buffer, true);
- }
- }
END_CRIT_SECTION();
/* Copy information back for caller */
- presult->nnewlpdead = prstate.ndead;
presult->ndeleted = prstate.ndeleted;
+ presult->nnewlpdead = prstate.ndead;
+ presult->nfrozen = prstate.nfrozen;
+ presult->live_tuples = prstate.live_tuples;
+ presult->recently_dead_tuples = prstate.recently_dead_tuples;
+
+ /*
+ * It was convenient to ignore LP_DEAD items in all_visible earlier on to
+ * make the choice of whether or not to freeze the page unaffected by the
+ * short-term presence of LP_DEAD items. These LP_DEAD items were
+ * effectively assumed to be LP_UNUSED items in the making. It doesn't
+ * matter which vacuum heap pass (initial pass or final pass) ends up
+ * setting the page all-frozen, as long as the ongoing VACUUM does it.
+ *
+ * Now that freezing has been finalized, unset all_visible if there are
+ * any LP_DEAD items on the page. It needs to reflect the present state
+ * of the page, as expected by our caller.
+ */
+ if (prstate.all_visible && prstate.lpdead_items == 0)
+ {
+ presult->all_visible = prstate.all_visible;
+ presult->all_frozen = prstate.all_frozen;
+ }
+ else
+ {
+ presult->all_visible = false;
+ presult->all_frozen = false;
+ }
+
+ presult->hastup = prstate.hastup;
+
+ /*
+ * For callers planning to update the visibility map, the conflict horizon
+ * for that record must be the newest xmin on the page. However, if the
+ * page is completely frozen, there can be no conflict and the
+ * vm_conflict_horizon should remain InvalidTransactionId. This includes
+ * the case that we just froze all the tuples; the prune-freeze record
+ * included the conflict XID already so the caller doesn't need it.
+ */
+ if (presult->all_frozen)
+ presult->vm_conflict_horizon = InvalidTransactionId;
+ else
+ presult->vm_conflict_horizon = prstate.visibility_cutoff_xid;
+
+ presult->lpdead_items = prstate.lpdead_items;
+ /* the presult->deadoffsets array was already filled in */
+
+ if (prstate.freeze)
+ {
+ if (presult->nfrozen > 0)
+ {
+ *new_relfrozen_xid = prstate.pagefrz.FreezePageRelfrozenXid;
+ *new_relmin_mxid = prstate.pagefrz.FreezePageRelminMxid;
+ }
+ else
+ {
+ *new_relfrozen_xid = prstate.pagefrz.NoFreezePageRelfrozenXid;
+ *new_relmin_mxid = prstate.pagefrz.NoFreezePageRelminMxid;
+ }
+ }
}
}
+/*
+ * Pruning calculates tuple visibility once and saves the results in an array
+ * of int8. See PruneState.htsv for details. This helper function is meant
+ * to guard against examining visibility status array members which have not
+ * yet been computed.
+ */
+static inline HTSV_Result
+htsv_get_valid_status(int status)
+{
+ Assert(status >= HEAPTUPLE_DEAD &&
+ status <= HEAPTUPLE_DELETE_IN_PROGRESS);
+ return (HTSV_Result) status;
+}
+
/*
* Prune specified line pointer or a HOT chain originating at line pointer.
*
- * Tuple visibility information is provided in htsv.
+ * Tuple visibility information is provided in prstate->htsv.
*
* If the item is an index-referenced tuple (i.e. not a heap-only tuple),
* the HOT chain is pruned by removing all DEAD tuples at the start of the HOT
* prstate showing the changes to be made. Items to be redirected are added
* to the redirected[] array (two entries per redirection); items to be set to
* LP_DEAD state are added to nowdead[]; and items to be set to LP_UNUSED
- * state are added to nowunused[].
+ * state are added to nowunused[]. We perform bookkeeping of live tuples,
+ * visibility etc. based on what the page will look like after the changes
+ * applied. All that bookkeeping is performed in the heap_prune_record_*()
+ * subroutines. The division of labor is that heap_prune_chain() decides the
+ * fate of each tuple, ie. whether it's going to be removed, redirected or
+ * left unchanged, and the heap_prune_record_*() subroutines update PruneState
+ * based on that outcome.
*/
static void
heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
- OffsetNumber rootoffnum, int8 *htsv, PruneState *prstate)
+ OffsetNumber rootoffnum, PruneState *prstate)
{
TransactionId priorXmax = InvalidTransactionId;
ItemId rootlp;
*/
chainitems[nchain++] = offnum;
- switch (htsv_get_valid_status(htsv[offnum]))
+ switch (htsv_get_valid_status(prstate->htsv[offnum]))
{
case HEAPTUPLE_DEAD:
/* Remember the last DEAD tuple seen */
ndeadchain = nchain;
HeapTupleHeaderAdvanceConflictHorizon(htup,
- &prstate->snapshotConflictHorizon);
-
+ &prstate->latest_xid_removed);
/* Advance to next chain member */
break;
{
/*
* We found a redirect item that doesn't point to a valid follow-on
- * item. This can happen if the loop in heap_page_prune caused us to
- * visit the dead successor of a redirect item before visiting the
- * redirect item. We can clean up by setting the redirect item to
- * LP_DEAD state or LP_UNUSED if the caller indicated.
+ * item. This can happen if the loop in heap_page_prune_and_freeze()
+ * caused us to visit the dead successor of a redirect item before
+ * visiting the redirect item. We can clean up by setting the
+ * redirect item to LP_DEAD state or LP_UNUSED if the caller
+ * indicated.
*/
heap_prune_record_dead_or_unused(prstate, rootoffnum, false);
return;
i++;
}
for (; i < nchain; i++)
- heap_prune_record_unchanged_lp_normal(page, htsv, prstate, chainitems[i]);
+ heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
}
else if (ndeadchain == nchain)
{
/* the rest of tuples in the chain are normal, unchanged tuples */
for (int i = ndeadchain; i < nchain; i++)
- heap_prune_record_unchanged_lp_normal(page, htsv, prstate, chainitems[i]);
+ heap_prune_record_unchanged_lp_normal(page, prstate, chainitems[i]);
}
}
*/
if (was_normal)
prstate->ndeleted++;
+
+ prstate->hastup = true;
}
/* Record line pointer to be marked dead */
prstate->nowdead[prstate->ndead] = offnum;
prstate->ndead++;
+ /*
+ * Deliberately delay unsetting all_visible until later during pruning.
+ * Removable dead tuples shouldn't preclude freezing the page.
+ */
+
+ /* Record the dead offset for vacuum */
+ prstate->deadoffsets[prstate->lpdead_items++] = offnum;
+
/*
* If the root entry had been a normal tuple, we are deleting it, so count
* it in the result. But changing a redirect (even to DEAD state) doesn't
}
/*
- * Record LP_NORMAL line pointer that is left unchanged.
+ * Record line pointer that is left unchanged. We consider freezing it, and
+ * update bookkeeping of tuple counts and page visibility.
*/
static void
-heap_prune_record_unchanged_lp_normal(Page page, int8 *htsv, PruneState *prstate, OffsetNumber offnum)
+heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumber offnum)
{
HeapTupleHeader htup;
Assert(!prstate->processed[offnum]);
prstate->processed[offnum] = true;
- switch (htsv[offnum])
+ prstate->hastup = true; /* the page is not empty */
+
+ /*
+ * The criteria for counting a tuple as live in this block need to match
+ * what analyze.c's acquire_sample_rows() does, otherwise VACUUM and
+ * ANALYZE may produce wildly different reltuples values, e.g. when there
+ * are many recently-dead tuples.
+ *
+ * The logic here is a bit simpler than acquire_sample_rows(), as VACUUM
+ * can't run inside a transaction block, which makes some cases impossible
+ * (e.g. in-progress insert from the same transaction).
+ *
+ * HEAPTUPLE_DEAD are handled by the other heap_prune_record_*()
+ * subroutines. They don't count dead items like acquire_sample_rows()
+ * does, because we assume that all dead items will become LP_UNUSED
+ * before VACUUM finishes. This difference is only superficial. VACUUM
+ * effectively agrees with ANALYZE about DEAD items, in the end. VACUUM
+ * won't remember LP_DEAD items, but only because they're not supposed to
+ * be left behind when it is done. (Cases where we bypass index vacuuming
+ * will violate this optimistic assumption, but the overall impact of that
+ * should be negligible.)
+ */
+ htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
+
+ switch (prstate->htsv[offnum])
{
case HEAPTUPLE_LIVE:
+
+ /*
+ * Count it as live. Not only is this natural, but it's also what
+ * acquire_sample_rows() does.
+ */
+ prstate->live_tuples++;
+
+ /*
+ * Is the tuple definitely visible to all transactions?
+ *
+ * NB: Like with per-tuple hint bits, we can't set the
+ * PD_ALL_VISIBLE flag if the inserter committed asynchronously.
+ * See SetHintBits for more info. Check that the tuple is hinted
+ * xmin-committed because of that.
+ */
+ if (prstate->all_visible)
+ {
+ TransactionId xmin;
+
+ if (!HeapTupleHeaderXminCommitted(htup))
+ {
+ prstate->all_visible = false;
+ break;
+ }
+
+ /*
+ * The inserter definitely committed. But is it old enough
+ * that everyone sees it as committed? A FrozenTransactionId
+ * is seen as committed to everyone. Otherwise, we check if
+ * there is a snapshot that considers this xid to still be
+ * running, and if so, we don't consider the page all-visible.
+ */
+ xmin = HeapTupleHeaderGetXmin(htup);
+
+ /*
+ * For now always use prstate->cutoffs for this test, because
+ * we only update 'all_visible' when freezing is requested. We
+ * could use GlobalVisTestIsRemovableXid instead, if a
+ * non-freezing caller wanted to set the VM bit.
+ */
+ Assert(prstate->cutoffs);
+ if (!TransactionIdPrecedes(xmin, prstate->cutoffs->OldestXmin))
+ {
+ prstate->all_visible = false;
+ break;
+ }
+
+ /* Track newest xmin on page. */
+ if (TransactionIdFollows(xmin, prstate->visibility_cutoff_xid) &&
+ TransactionIdIsNormal(xmin))
+ prstate->visibility_cutoff_xid = xmin;
+ }
+ break;
+
+ case HEAPTUPLE_RECENTLY_DEAD:
+ prstate->recently_dead_tuples++;
+ prstate->all_visible = false;
+
+ /*
+ * This tuple will soon become DEAD. Update the hint field so
+ * that the page is reconsidered for pruning in future.
+ */
+ heap_prune_record_prunable(prstate,
+ HeapTupleHeaderGetUpdateXid(htup));
+ break;
+
case HEAPTUPLE_INSERT_IN_PROGRESS:
+ /*
+ * We do not count these rows as live, because we expect the
+ * inserting transaction to update the counters at commit, and we
+ * assume that will happen only after we report our results. This
+ * assumption is a bit shaky, but it is what acquire_sample_rows()
+ * does, so be consistent.
+ */
+ prstate->all_visible = false;
+
/*
* If we wanted to optimize for aborts, we might consider marking
* the page prunable when we see INSERT_IN_PROGRESS. But we
*/
break;
- case HEAPTUPLE_RECENTLY_DEAD:
case HEAPTUPLE_DELETE_IN_PROGRESS:
- htup = (HeapTupleHeader) PageGetItem(page, PageGetItemId(page, offnum));
+ /*
+ * This an expected case during concurrent vacuum. Count such
+ * rows as live. As above, we assume the deleting transaction
+ * will commit and update the counters after we report.
+ */
+ prstate->live_tuples++;
+ prstate->all_visible = false;
/*
* This tuple may soon become DEAD. Update the hint field so that
HeapTupleHeaderGetUpdateXid(htup));
break;
-
default:
/*
* DEAD tuples should've been passed to heap_prune_record_dead()
* or heap_prune_record_unused() instead.
*/
- elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d", htsv[offnum]);
+ elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result %d",
+ prstate->htsv[offnum]);
break;
}
+
+ /* Consider freezing any normal tuples which will not be removed */
+ if (prstate->freeze)
+ {
+ bool totally_frozen;
+
+ if ((heap_prepare_freeze_tuple(htup,
+ prstate->cutoffs,
+ &prstate->pagefrz,
+ &prstate->frozen[prstate->nfrozen],
+ &totally_frozen)))
+ {
+ /* Save prepared freeze plan for later */
+ prstate->frozen[prstate->nfrozen++].offset = offnum;
+ }
+
+ /*
+ * If any tuple isn't either totally frozen already or eligible to
+ * become totally frozen (according to its freeze plan), then the page
+ * definitely cannot be set all-frozen in the visibility map later on.
+ */
+ if (!totally_frozen)
+ prstate->all_frozen = false;
+ }
}
{
Assert(!prstate->processed[offnum]);
prstate->processed[offnum] = true;
+
+ /*
+ * Deliberately don't set hastup for LP_DEAD items. We make the soft
+ * assumption that any LP_DEAD items encountered here will become
+ * LP_UNUSED later on, before count_nondeletable_pages is reached. If we
+ * don't make this assumption then rel truncation will only happen every
+ * other VACUUM, at most. Besides, VACUUM must treat
+ * hastup/nonempty_pages as provisional no matter how LP_DEAD items are
+ * handled (handled here, or handled later on).
+ *
+ * Similarly, don't unset all_visible until later, at the end of
+ * heap_page_prune_and_freeze(). This will allow us to attempt to freeze
+ * the page after pruning. As long as we unset it before updating the
+ * visibility map, this will be correct.
+ */
+
+ /* Record the dead offset for vacuum */
+ prstate->deadoffsets[prstate->lpdead_items++] = offnum;
}
/*
static void
heap_prune_record_unchanged_lp_redirect(PruneState *prstate, OffsetNumber offnum)
{
+ /*
+ * A redirect line pointer doesn't count as a live tuple.
+ *
+ * If we leave a redirect line pointer in place, there will be another
+ * tuple on the page that it points to. We will do the bookkeeping for
+ * that separately. So we have nothing to do here, except remember that
+ * we processed this item.
+ */
Assert(!prstate->processed[offnum]);
prstate->processed[offnum] = true;
}
/*
- * Perform the actual page changes needed by heap_page_prune.
+ * Perform the actual page changes needed by heap_page_prune_and_freeze().
*
* If 'lp_truncate_only' is set, we are merely marking LP_DEAD line pointers
* as unused, not redirecting or removing anything else. The
else
{
/*
- * When heap_page_prune() was called, mark_unused_now may have
- * been passed as true, which allows would-be LP_DEAD items to be
- * made LP_UNUSED instead. This is only possible if the relation
- * has no indexes. If there are any dead items, then
+ * When heap_page_prune_and_freeze() was called, mark_unused_now
+ * may have been passed as true, which allows would-be LP_DEAD
+ * items to be made LP_UNUSED instead. This is only possible if
+ * the relation has no indexes. If there are any dead items, then
* mark_unused_now was not true and every item being marked
* LP_UNUSED must refer to a heap-only tuple.
*/
#include "commands/dbcommands.h"
#include "commands/progress.h"
#include "commands/vacuum.h"
+#include "common/int.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
* as an upper bound on the XIDs stored in the pages we'll actually scan
* (NewRelfrozenXid tracking must never be allowed to miss unfrozen XIDs).
*
- * Next acquire vistest, a related cutoff that's used in heap_page_prune.
- * We expect vistest will always make heap_page_prune remove any deleted
- * tuple whose xmax is < OldestXmin. lazy_scan_prune must never become
- * confused about whether a tuple should be frozen or removed. (In the
- * future we might want to teach lazy_scan_prune to recompute vistest from
- * time to time, to increase the number of dead tuples it can prune away.)
+ * Next acquire vistest, a related cutoff that's used in pruning. We
+ * expect vistest will always make heap_page_prune_and_freeze() remove any
+ * deleted tuple whose xmax is < OldestXmin. lazy_scan_prune must never
+ * become confused about whether a tuple should be frozen or removed. (In
+ * the future we might want to teach lazy_scan_prune to recompute vistest
+ * from time to time, to increase the number of dead tuples it can prune
+ * away.)
*/
vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
return false;
}
+/* qsort comparator for sorting OffsetNumbers */
+static int
+cmpOffsetNumbers(const void *a, const void *b)
+{
+ return pg_cmp_u16(*(const OffsetNumber *) a, *(const OffsetNumber *) b);
+}
+
/*
* lazy_scan_prune() -- lazy_scan_heap() pruning and freezing.
*
* Caller must hold pin and buffer cleanup lock on the buffer.
*
- * Prior to PostgreSQL 14 there were very rare cases where heap_page_prune()
- * was allowed to disagree with our HeapTupleSatisfiesVacuum() call about
- * whether or not a tuple should be considered DEAD. This happened when an
- * inserting transaction concurrently aborted (after our heap_page_prune()
- * call, before our HeapTupleSatisfiesVacuum() call). There was rather a lot
- * of complexity just so we could deal with tuples that were DEAD to VACUUM,
- * but nevertheless were left with storage after pruning.
- *
- * As of Postgres 17, we circumvent this problem altogether by reusing the
- * result of heap_page_prune()'s visibility check. Without the second call to
- * HeapTupleSatisfiesVacuum(), there is no new HTSV_Result and there can be no
- * disagreement. We'll just handle such tuples as if they had become fully dead
- * right after this operation completes instead of in the middle of it. Note that
- * any tuple that becomes dead after the call to heap_page_prune() can't need to
- * be frozen, because it was visible to another session when vacuum started.
- *
* vmbuffer is the buffer containing the VM block with visibility information
* for the heap block, blkno. all_visible_according_to_vm is the saved
* visibility status of the heap block looked up earlier by the caller. We
bool *has_lpdead_items)
{
Relation rel = vacrel->rel;
- OffsetNumber offnum,
- maxoff;
- ItemId itemid;
- PruneResult presult;
- int tuples_frozen,
- lpdead_items,
- live_tuples,
- recently_dead_tuples;
- HeapPageFreeze pagefrz;
- bool hastup = false;
- bool all_visible,
- all_frozen;
- TransactionId visibility_cutoff_xid;
+ PruneFreezeResult presult;
int prune_options = 0;
- int64 fpi_before = pgWalUsage.wal_fpi;
- OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
- HeapTupleFreeze frozen[MaxHeapTuplesPerPage];
Assert(BufferGetBlockNumber(buf) == blkno);
/*
- * maxoff might be reduced following line pointer array truncation in
- * heap_page_prune. That's safe for us to ignore, since the reclaimed
- * space will continue to look like LP_UNUSED items below.
- */
- maxoff = PageGetMaxOffsetNumber(page);
-
- /* Initialize (or reset) page-level state */
- pagefrz.freeze_required = false;
- pagefrz.FreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
- pagefrz.FreezePageRelminMxid = vacrel->NewRelminMxid;
- pagefrz.NoFreezePageRelfrozenXid = vacrel->NewRelfrozenXid;
- pagefrz.NoFreezePageRelminMxid = vacrel->NewRelminMxid;
- tuples_frozen = 0;
- lpdead_items = 0;
- live_tuples = 0;
- recently_dead_tuples = 0;
-
- /*
- * Prune all HOT-update chains in this page.
- *
- * We count the number of tuples removed from the page by the pruning step
- * in presult.ndeleted. It should not be confused with lpdead_items;
- * lpdead_items's final value can be thought of as the number of tuples
- * that were deleted from indexes.
+ * Prune all HOT-update chains and potentially freeze tuples on this page.
*
* If the relation has no indexes, we can immediately mark would-be dead
* items LP_UNUSED.
- */
- prune_options = 0;
- if (vacrel->nindexes == 0)
- prune_options = HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
- heap_page_prune(rel, buf, vacrel->vistest, prune_options,
- &presult, PRUNE_VACUUM_SCAN, &vacrel->offnum);
-
- /*
- * We will update the VM after collecting LP_DEAD items and freezing
- * tuples. Keep track of whether or not the page is all_visible and
- * all_frozen and use this information to update the VM. all_visible
- * implies 0 lpdead_items, but don't trust all_frozen result unless
- * all_visible is also set to true.
*
- * Also keep track of the visibility cutoff xid for recovery conflicts.
- */
- all_visible = true;
- all_frozen = true;
- visibility_cutoff_xid = InvalidTransactionId;
-
- /*
- * Now scan the page to collect LP_DEAD items and update the variables set
- * just above.
- */
- for (offnum = FirstOffsetNumber;
- offnum <= maxoff;
- offnum = OffsetNumberNext(offnum))
- {
- HeapTupleHeader htup;
- bool totally_frozen;
-
- /*
- * Set the offset number so that we can display it along with any
- * error that occurred while processing this tuple.
- */
- vacrel->offnum = offnum;
- itemid = PageGetItemId(page, offnum);
-
- if (!ItemIdIsUsed(itemid))
- continue;
-
- /* Redirect items mustn't be touched */
- if (ItemIdIsRedirected(itemid))
- {
- /* page makes rel truncation unsafe */
- hastup = true;
- continue;
- }
-
- if (ItemIdIsDead(itemid))
- {
- /*
- * Deliberately don't set hastup for LP_DEAD items. We make the
- * soft assumption that any LP_DEAD items encountered here will
- * become LP_UNUSED later on, before count_nondeletable_pages is
- * reached. If we don't make this assumption then rel truncation
- * will only happen every other VACUUM, at most. Besides, VACUUM
- * must treat hastup/nonempty_pages as provisional no matter how
- * LP_DEAD items are handled (handled here, or handled later on).
- *
- * Also deliberately delay unsetting all_visible until just before
- * we return to lazy_scan_heap caller, as explained in full below.
- * (This is another case where it's useful to anticipate that any
- * LP_DEAD items will become LP_UNUSED during the ongoing VACUUM.)
- */
- deadoffsets[lpdead_items++] = offnum;
- continue;
- }
-
- Assert(ItemIdIsNormal(itemid));
-
- htup = (HeapTupleHeader) PageGetItem(page, itemid);
-
- /*
- * The criteria for counting a tuple as live in this block need to
- * match what analyze.c's acquire_sample_rows() does, otherwise VACUUM
- * and ANALYZE may produce wildly different reltuples values, e.g.
- * when there are many recently-dead tuples.
- *
- * The logic here is a bit simpler than acquire_sample_rows(), as
- * VACUUM can't run inside a transaction block, which makes some cases
- * impossible (e.g. in-progress insert from the same transaction).
- *
- * We treat LP_DEAD items (which are the closest thing to DEAD tuples
- * that might be seen here) differently, too: we assume that they'll
- * become LP_UNUSED before VACUUM finishes. This difference is only
- * superficial. VACUUM effectively agrees with ANALYZE about DEAD
- * items, in the end. VACUUM won't remember LP_DEAD items, but only
- * because they're not supposed to be left behind when it is done.
- * (Cases where we bypass index vacuuming will violate this optimistic
- * assumption, but the overall impact of that should be negligible.)
- */
- switch (htsv_get_valid_status(presult.htsv[offnum]))
- {
- case HEAPTUPLE_LIVE:
-
- /*
- * Count it as live. Not only is this natural, but it's also
- * what acquire_sample_rows() does.
- */
- live_tuples++;
-
- /*
- * Is the tuple definitely visible to all transactions?
- *
- * NB: Like with per-tuple hint bits, we can't set the
- * PD_ALL_VISIBLE flag if the inserter committed
- * asynchronously. See SetHintBits for more info. Check that
- * the tuple is hinted xmin-committed because of that.
- */
- if (all_visible)
- {
- TransactionId xmin;
-
- if (!HeapTupleHeaderXminCommitted(htup))
- {
- all_visible = false;
- break;
- }
-
- /*
- * The inserter definitely committed. But is it old enough
- * that everyone sees it as committed?
- */
- xmin = HeapTupleHeaderGetXmin(htup);
- if (!TransactionIdPrecedes(xmin,
- vacrel->cutoffs.OldestXmin))
- {
- all_visible = false;
- break;
- }
-
- /* Track newest xmin on page. */
- if (TransactionIdFollows(xmin, visibility_cutoff_xid) &&
- TransactionIdIsNormal(xmin))
- visibility_cutoff_xid = xmin;
- }
- break;
- case HEAPTUPLE_RECENTLY_DEAD:
-
- /*
- * If tuple is recently dead then we must not remove it from
- * the relation. (We only remove items that are LP_DEAD from
- * pruning.)
- */
- recently_dead_tuples++;
- all_visible = false;
- break;
- case HEAPTUPLE_INSERT_IN_PROGRESS:
-
- /*
- * We do not count these rows as live, because we expect the
- * inserting transaction to update the counters at commit, and
- * we assume that will happen only after we report our
- * results. This assumption is a bit shaky, but it is what
- * acquire_sample_rows() does, so be consistent.
- */
- all_visible = false;
- break;
- case HEAPTUPLE_DELETE_IN_PROGRESS:
- /* This is an expected case during concurrent vacuum */
- all_visible = false;
-
- /*
- * Count such rows as live. As above, we assume the deleting
- * transaction will commit and update the counters after we
- * report.
- */
- live_tuples++;
- break;
- default:
- elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
- break;
- }
-
- hastup = true; /* page makes rel truncation unsafe */
-
- /* Tuple with storage -- consider need to freeze */
- if (heap_prepare_freeze_tuple(htup, &vacrel->cutoffs, &pagefrz,
- &frozen[tuples_frozen], &totally_frozen))
- {
- /* Save prepared freeze plan for later */
- frozen[tuples_frozen++].offset = offnum;
- }
-
- /*
- * If any tuple isn't either totally frozen already or eligible to
- * become totally frozen (according to its freeze plan), then the page
- * definitely cannot be set all-frozen in the visibility map later on
- */
- if (!totally_frozen)
- all_frozen = false;
- }
-
- /*
- * We have now divided every item on the page into either an LP_DEAD item
- * that will need to be vacuumed in indexes later, or a LP_NORMAL tuple
- * that remains and needs to be considered for freezing now (LP_UNUSED and
- * LP_REDIRECT items also remain, but are of no further interest to us).
- */
- vacrel->offnum = InvalidOffsetNumber;
-
- /*
- * Freeze the page when heap_prepare_freeze_tuple indicates that at least
- * one XID/MXID from before FreezeLimit/MultiXactCutoff is present. Also
- * freeze when pruning generated an FPI, if doing so means that we set the
- * page all-frozen afterwards (might not happen until final heap pass).
+ * The number of tuples removed from the page is returned in
+ * presult.ndeleted. It should not be confused with presult.lpdead_items;
+ * presult.lpdead_items's final value can be thought of as the number of
+ * tuples that were deleted from indexes.
+ *
+ * We will update the VM after collecting LP_DEAD items and freezing
+ * tuples. Pruning will have determined whether or not the page is
+ * all-visible.
*/
- if (pagefrz.freeze_required || tuples_frozen == 0 ||
- (all_visible && all_frozen &&
- fpi_before != pgWalUsage.wal_fpi))
- {
- /*
- * We're freezing the page. Our final NewRelfrozenXid doesn't need to
- * be affected by the XIDs that are just about to be frozen anyway.
- */
- vacrel->NewRelfrozenXid = pagefrz.FreezePageRelfrozenXid;
- vacrel->NewRelminMxid = pagefrz.FreezePageRelminMxid;
-
- if (tuples_frozen == 0)
- {
- /*
- * We have no freeze plans to execute, so there's no added cost
- * from following the freeze path. That's why it was chosen. This
- * is important in the case where the page only contains totally
- * frozen tuples at this point (perhaps only following pruning).
- * Such pages can be marked all-frozen in the VM by our caller,
- * even though none of its tuples were newly frozen here (note
- * that the "no freeze" path never sets pages all-frozen).
- *
- * We never increment the frozen_pages instrumentation counter
- * here, since it only counts pages with newly frozen tuples
- * (don't confuse that with pages newly set all-frozen in VM).
- */
- }
- else
- {
- TransactionId snapshotConflictHorizon;
+ prune_options = HEAP_PAGE_PRUNE_FREEZE;
+ if (vacrel->nindexes == 0)
+ prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
- vacrel->frozen_pages++;
+ heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
+ &vacrel->cutoffs, &presult, PRUNE_VACUUM_SCAN,
+ &vacrel->offnum,
+ &vacrel->NewRelfrozenXid, &vacrel->NewRelminMxid);
- /*
- * We can use visibility_cutoff_xid as our cutoff for conflicts
- * when the whole page is eligible to become all-frozen in the VM
- * once we're done with it. Otherwise we generate a conservative
- * cutoff by stepping back from OldestXmin.
- */
- if (all_visible && all_frozen)
- {
- /* Using same cutoff when setting VM is now unnecessary */
- snapshotConflictHorizon = visibility_cutoff_xid;
- visibility_cutoff_xid = InvalidTransactionId;
- }
- else
- {
- /* Avoids false conflicts when hot_standby_feedback in use */
- snapshotConflictHorizon = vacrel->cutoffs.OldestXmin;
- TransactionIdRetreat(snapshotConflictHorizon);
- }
+ Assert(MultiXactIdIsValid(vacrel->NewRelminMxid));
+ Assert(TransactionIdIsValid(vacrel->NewRelfrozenXid));
- /* Execute all freeze plans for page as a single atomic action */
- heap_freeze_execute_prepared(vacrel->rel, buf,
- snapshotConflictHorizon,
- frozen, tuples_frozen);
- }
- }
- else
+ if (presult.nfrozen > 0)
{
/*
- * Page requires "no freeze" processing. It might be set all-visible
- * in the visibility map, but it can never be set all-frozen.
+ * We don't increment the frozen_pages instrumentation counter when
+ * nfrozen == 0, since it only counts pages with newly frozen tuples
+ * (don't confuse that with pages newly set all-frozen in VM).
*/
- vacrel->NewRelfrozenXid = pagefrz.NoFreezePageRelfrozenXid;
- vacrel->NewRelminMxid = pagefrz.NoFreezePageRelminMxid;
- all_frozen = false;
- tuples_frozen = 0; /* avoid miscounts in instrumentation */
+ vacrel->frozen_pages++;
}
/*
*/
#ifdef USE_ASSERT_CHECKING
/* Note that all_frozen value does not matter when !all_visible */
- if (all_visible && lpdead_items == 0)
+ if (presult.all_visible)
{
TransactionId debug_cutoff;
bool debug_all_frozen;
+ Assert(presult.lpdead_items == 0);
+
if (!heap_page_is_all_visible(vacrel, buf,
&debug_cutoff, &debug_all_frozen))
Assert(false);
+ Assert(presult.all_frozen == debug_all_frozen);
+
Assert(!TransactionIdIsValid(debug_cutoff) ||
- debug_cutoff == visibility_cutoff_xid);
+ debug_cutoff == presult.vm_conflict_horizon);
}
#endif
/*
* Now save details of the LP_DEAD items from the page in vacrel
*/
- if (lpdead_items > 0)
+ if (presult.lpdead_items > 0)
{
vacrel->lpdead_item_pages++;
- dead_items_add(vacrel, blkno, deadoffsets, lpdead_items);
-
/*
- * It was convenient to ignore LP_DEAD items in all_visible earlier on
- * to make the choice of whether or not to freeze the page unaffected
- * by the short-term presence of LP_DEAD items. These LP_DEAD items
- * were effectively assumed to be LP_UNUSED items in the making. It
- * doesn't matter which heap pass (initial pass or final pass) ends up
- * setting the page all-frozen, as long as the ongoing VACUUM does it.
- *
- * Now that freezing has been finalized, unset all_visible. It needs
- * to reflect the present state of things, as expected by our caller.
+ * deadoffsets are collected incrementally in
+ * heap_page_prune_and_freeze() as each dead line pointer is recorded,
+ * with an indeterminate order, but dead_items_add requires them to be
+ * sorted.
*/
- all_visible = false;
+ qsort(presult.deadoffsets, presult.lpdead_items, sizeof(OffsetNumber),
+ cmpOffsetNumbers);
+
+ dead_items_add(vacrel, blkno, presult.deadoffsets, presult.lpdead_items);
}
/* Finally, add page-local counts to whole-VACUUM counts */
vacrel->tuples_deleted += presult.ndeleted;
- vacrel->tuples_frozen += tuples_frozen;
- vacrel->lpdead_items += lpdead_items;
- vacrel->live_tuples += live_tuples;
- vacrel->recently_dead_tuples += recently_dead_tuples;
+ vacrel->tuples_frozen += presult.nfrozen;
+ vacrel->lpdead_items += presult.lpdead_items;
+ vacrel->live_tuples += presult.live_tuples;
+ vacrel->recently_dead_tuples += presult.recently_dead_tuples;
/* Can't truncate this page */
- if (hastup)
+ if (presult.hastup)
vacrel->nonempty_pages = blkno + 1;
/* Did we find LP_DEAD items? */
- *has_lpdead_items = (lpdead_items > 0);
+ *has_lpdead_items = (presult.lpdead_items > 0);
- Assert(!all_visible || !(*has_lpdead_items));
+ Assert(!presult.all_visible || !(*has_lpdead_items));
/*
* Handle setting visibility map bit based on information from the VM (as
* of last heap_vac_scan_next_block() call), and from all_visible and
* all_frozen variables
*/
- if (!all_visible_according_to_vm && all_visible)
+ if (!all_visible_according_to_vm && presult.all_visible)
{
uint8 flags = VISIBILITYMAP_ALL_VISIBLE;
- if (all_frozen)
+ if (presult.all_frozen)
{
- Assert(!TransactionIdIsValid(visibility_cutoff_xid));
+ Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
flags |= VISIBILITYMAP_ALL_FROZEN;
}
PageSetAllVisible(page);
MarkBufferDirty(buf);
visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, visibility_cutoff_xid,
+ vmbuffer, presult.vm_conflict_horizon,
flags);
}
* There should never be LP_DEAD items on a page with PD_ALL_VISIBLE set,
* however.
*/
- else if (lpdead_items > 0 && PageIsAllVisible(page))
+ else if (presult.lpdead_items > 0 && PageIsAllVisible(page))
{
elog(WARNING, "page containing LP_DEAD items is marked as all-visible in relation \"%s\" page %u",
vacrel->relname, blkno);
* it as all-frozen. Note that all_frozen is only valid if all_visible is
* true, so we must check both all_visible and all_frozen.
*/
- else if (all_visible_according_to_vm && all_visible &&
- all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
+ else if (all_visible_according_to_vm && presult.all_visible &&
+ presult.all_frozen && !VM_ALL_FROZEN(vacrel->rel, blkno, &vmbuffer))
{
/*
* Avoid relying on all_visible_according_to_vm as a proxy for the
/*
* Set the page all-frozen (and all-visible) in the VM.
*
- * We can pass InvalidTransactionId as our visibility_cutoff_xid,
- * since a snapshotConflictHorizon sufficient to make everything safe
- * for REDO was logged when the page's tuples were frozen.
+ * We can pass InvalidTransactionId as our cutoff_xid, since a
+ * snapshotConflictHorizon sufficient to make everything safe for REDO
+ * was logged when the page's tuples were frozen.
*/
- Assert(!TransactionIdIsValid(visibility_cutoff_xid));
+ Assert(!TransactionIdIsValid(presult.vm_conflict_horizon));
visibilitymap_set(vacrel->rel, blkno, buf, InvalidXLogRecPtr,
vmbuffer, InvalidTransactionId,
VISIBILITYMAP_ALL_VISIBLE |