lo \
ltree \
oid2name \
- old_snapshot \
pageinspect \
passwordcheck \
pg_buffercache \
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
if (!PageIsNew(page) && !BloomPageIsDeleted(page))
{
subdir('ltree')
subdir('ltree_plpython')
subdir('oid2name')
-subdir('old_snapshot')
subdir('pageinspect')
subdir('passwordcheck')
subdir('pg_buffercache')
+++ /dev/null
-# contrib/old_snapshot/Makefile
-
-MODULE_big = old_snapshot
-OBJS = \
- $(WIN32RES) \
- time_mapping.o
-
-EXTENSION = old_snapshot
-DATA = old_snapshot--1.0.sql
-PGFILEDESC = "old_snapshot - utilities in support of old_snapshot_threshold"
-
-ifdef USE_PGXS
-PG_CONFIG = pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
-else
-subdir = contrib/old_snapshot
-top_builddir = ../..
-include $(top_builddir)/src/Makefile.global
-include $(top_srcdir)/contrib/contrib-global.mk
-endif
+++ /dev/null
-# Copyright (c) 2022-2023, PostgreSQL Global Development Group
-
-old_snapshot_sources = files(
- 'time_mapping.c',
-)
-
-if host_system == 'windows'
- old_snapshot_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
- '--NAME', 'old_snapshot',
- '--FILEDESC', 'old_snapshot - utilities in support of old_snapshot_threshold',])
-endif
-
-old_snapshot = shared_module('old_snapshot',
- old_snapshot_sources,
- kwargs: contrib_mod_args,
-)
-contrib_targets += old_snapshot
-
-install_data(
- 'old_snapshot.control',
- 'old_snapshot--1.0.sql',
- kwargs: contrib_data_args,
-)
+++ /dev/null
-/* contrib/old_snapshot/old_snapshot--1.0.sql */
-
--- complain if script is sourced in psql, rather than via CREATE EXTENSION
-\echo Use "CREATE EXTENSION old_snapshot" to load this file. \quit
-
--- Show visibility map and page-level visibility information for each block.
-CREATE FUNCTION pg_old_snapshot_time_mapping(array_offset OUT int4,
- end_timestamp OUT timestamptz,
- newest_xmin OUT xid)
-RETURNS SETOF record
-AS 'MODULE_PATHNAME', 'pg_old_snapshot_time_mapping'
-LANGUAGE C STRICT;
-
--- XXX. Do we want REVOKE commands here?
+++ /dev/null
-# old_snapshot extension
-comment = 'utilities in support of old_snapshot_threshold'
-default_version = '1.0'
-module_pathname = '$libdir/old_snapshot'
-relocatable = true
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * time_mapping.c
- * time to XID mapping information
- *
- * Copyright (c) 2020-2023, PostgreSQL Global Development Group
- *
- * contrib/old_snapshot/time_mapping.c
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "funcapi.h"
-#include "storage/lwlock.h"
-#include "utils/old_snapshot.h"
-#include "utils/snapmgr.h"
-#include "utils/timestamp.h"
-
-/*
- * Backend-private copy of the information from oldSnapshotControl which relates
- * to the time to XID mapping, plus an index so that we can iterate.
- *
- * Note that the length of the xid_by_minute array is given by
- * OLD_SNAPSHOT_TIME_MAP_ENTRIES (which is not a compile-time constant).
- */
-typedef struct
-{
- int current_index;
- int head_offset;
- TimestampTz head_timestamp;
- int count_used;
- TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotTimeMapping;
-
-#define NUM_TIME_MAPPING_COLUMNS 3
-
-PG_MODULE_MAGIC;
-PG_FUNCTION_INFO_V1(pg_old_snapshot_time_mapping);
-
-static OldSnapshotTimeMapping *GetOldSnapshotTimeMapping(void);
-static HeapTuple MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc,
- OldSnapshotTimeMapping *mapping);
-
-/*
- * SQL-callable set-returning function.
- */
-Datum
-pg_old_snapshot_time_mapping(PG_FUNCTION_ARGS)
-{
- FuncCallContext *funcctx;
- OldSnapshotTimeMapping *mapping;
-
- if (SRF_IS_FIRSTCALL())
- {
- MemoryContext oldcontext;
- TupleDesc tupdesc;
-
- funcctx = SRF_FIRSTCALL_INIT();
- oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
- mapping = GetOldSnapshotTimeMapping();
- funcctx->user_fctx = mapping;
- if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
- elog(ERROR, "return type must be a row type");
- funcctx->tuple_desc = tupdesc;
- MemoryContextSwitchTo(oldcontext);
- }
-
- funcctx = SRF_PERCALL_SETUP();
- mapping = (OldSnapshotTimeMapping *) funcctx->user_fctx;
-
- while (mapping->current_index < mapping->count_used)
- {
- HeapTuple tuple;
-
- tuple = MakeOldSnapshotTimeMappingTuple(funcctx->tuple_desc, mapping);
- ++mapping->current_index;
- SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
- }
-
- SRF_RETURN_DONE(funcctx);
-}
-
-/*
- * Get the old snapshot time mapping data from shared memory.
- */
-static OldSnapshotTimeMapping *
-GetOldSnapshotTimeMapping(void)
-{
- OldSnapshotTimeMapping *mapping;
-
- mapping = palloc(offsetof(OldSnapshotTimeMapping, xid_by_minute)
- + sizeof(TransactionId) * OLD_SNAPSHOT_TIME_MAP_ENTRIES);
- mapping->current_index = 0;
-
- LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
- mapping->head_offset = oldSnapshotControl->head_offset;
- mapping->head_timestamp = oldSnapshotControl->head_timestamp;
- mapping->count_used = oldSnapshotControl->count_used;
- for (int i = 0; i < OLD_SNAPSHOT_TIME_MAP_ENTRIES; ++i)
- mapping->xid_by_minute[i] = oldSnapshotControl->xid_by_minute[i];
- LWLockRelease(OldSnapshotTimeMapLock);
-
- return mapping;
-}
-
-/*
- * Convert one entry from the old snapshot time mapping to a HeapTuple.
- */
-static HeapTuple
-MakeOldSnapshotTimeMappingTuple(TupleDesc tupdesc, OldSnapshotTimeMapping *mapping)
-{
- Datum values[NUM_TIME_MAPPING_COLUMNS];
- bool nulls[NUM_TIME_MAPPING_COLUMNS];
- int array_position;
- TimestampTz timestamp;
-
- /*
- * Figure out the array position corresponding to the current index.
- *
- * Index 0 means the oldest entry in the mapping, which is stored at
- * mapping->head_offset. Index 1 means the next-oldest entry, which is a
- * the following index, and so on. We wrap around when we reach the end of
- * the array.
- */
- array_position = (mapping->head_offset + mapping->current_index)
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- /*
- * No explicit timestamp is stored for any entry other than the oldest
- * one, but each entry corresponds to 1-minute period, so we can just add.
- */
- timestamp = TimestampTzPlusMilliseconds(mapping->head_timestamp,
- mapping->current_index * 60000);
-
- /* Initialize nulls and values arrays. */
- memset(nulls, 0, sizeof(nulls));
- values[0] = Int32GetDatum(array_position);
- values[1] = TimestampTzGetDatum(timestamp);
- values[2] = TransactionIdGetDatum(mapping->xid_by_minute[array_position]);
-
- return heap_form_tuple(tupdesc, values, nulls);
-}
</para>
</listitem>
</varlistentry>
-
- <varlistentry id="guc-old-snapshot-threshold" xreflabel="old_snapshot_threshold">
- <term><varname>old_snapshot_threshold</varname> (<type>integer</type>)
- <indexterm>
- <primary><varname>old_snapshot_threshold</varname> configuration parameter</primary>
- </indexterm>
- </term>
- <listitem>
- <para>
- Sets the minimum amount of time that a query snapshot can be used
- without risk of a <quote>snapshot too old</quote> error occurring
- when using the snapshot. Data that has been dead for longer than
- this threshold is allowed to be vacuumed away. This can help
- prevent bloat in the face of snapshots which remain in use for a
- long time. To prevent incorrect results due to cleanup of data which
- would otherwise be visible to the snapshot, an error is generated
- when the snapshot is older than this threshold and the snapshot is
- used to read a page which has been modified since the snapshot was
- built.
- </para>
-
- <para>
- If this value is specified without units, it is taken as minutes.
- A value of <literal>-1</literal> (the default) disables this feature,
- effectively setting the snapshot age limit to infinity.
- This parameter can only be set at server start.
- </para>
-
- <para>
- Useful values for production work probably range from a small number
- of hours to a few days. Small values (such as <literal>0</literal> or
- <literal>1min</literal>) are only allowed because they may sometimes be
- useful for testing. While a setting as high as <literal>60d</literal> is
- allowed, please note that in many workloads extreme bloat or
- transaction ID wraparound may occur in much shorter time frames.
- </para>
-
- <para>
- When this feature is enabled, freed space at the end of a relation
- cannot be released to the operating system, since that could remove
- information needed to detect the <quote>snapshot too old</quote>
- condition. All space allocated to a relation remains associated with
- that relation for reuse only within that relation unless explicitly
- freed (for example, with <command>VACUUM FULL</command>).
- </para>
-
- <para>
- This setting does not attempt to guarantee that an error will be
- generated under any particular circumstances. In fact, if the
- correct results can be generated from (for example) a cursor which
- has materialized a result set, no error will be generated even if the
- underlying rows in the referenced table have been vacuumed away.
- Some tables cannot safely be vacuumed early, and so will not be
- affected by this setting, such as system catalogs. For such tables
- this setting will neither reduce bloat nor create a possibility
- of a <quote>snapshot too old</quote> error on scanning.
- </para>
- </listitem>
- </varlistentry>
</variablelist>
</sect2>
</sect1>
until it eventually reaches the primary. Standbys make no other use
of feedback they receive other than to pass upstream.
</para>
- <para>
- This setting does not override the behavior of
- <varname>old_snapshot_threshold</varname> on the primary; a snapshot on the
- standby which exceeds the primary's age threshold can become invalid,
- resulting in cancellation of transactions on the standby. This is
- because <varname>old_snapshot_threshold</varname> is intended to provide an
- absolute limit on the time which dead rows can contribute to bloat,
- which would otherwise be violated because of the configuration of a
- standby.
- </para>
</listitem>
</varlistentry>
&isn;
&lo;
<ree;
- &oldsnapshot;
&pageinspect;
&passwordcheck;
&pgbuffercache;
<!ENTITY lo SYSTEM "lo.sgml">
<!ENTITY ltree SYSTEM "ltree.sgml">
<!ENTITY oid2name SYSTEM "oid2name.sgml">
-<!ENTITY oldsnapshot SYSTEM "oldsnapshot.sgml">
<!ENTITY pageinspect SYSTEM "pageinspect.sgml">
<!ENTITY passwordcheck SYSTEM "passwordcheck.sgml">
<!ENTITY pgbuffercache SYSTEM "pgbuffercache.sgml">
+++ /dev/null
-<!-- doc/src/sgml/oldsnapshot.sgml -->
-
-<sect1 id="oldsnapshot" xreflabel="old_snapshot">
- <title>old_snapshot — inspect <literal>old_snapshot_threshold</literal> state</title>
-
- <indexterm zone="oldsnapshot">
- <primary>old_snapshot</primary>
- </indexterm>
-
- <para>
- The <filename>old_snapshot</filename> module allows inspection
- of the server state that is used to implement
- <xref linkend="guc-old-snapshot-threshold" />.
- </para>
-
- <sect2 id="oldsnapshot-functions">
- <title>Functions</title>
-
- <variablelist>
- <varlistentry>
- <term><function>pg_old_snapshot_time_mapping(array_offset OUT int4, end_timestamp OUT timestamptz, newest_xmin OUT xid) returns setof record</function></term>
- <listitem>
- <para>
- Returns all of the entries in the server's timestamp to XID mapping.
- Each entry represents the newest xmin of any snapshot taken in the
- corresponding minute.
- </para>
- </listitem>
- </varlistentry>
- </variablelist>
- </sect2>
-
-</sect1>
meta = ReadBuffer(idxrel, BRIN_METAPAGE_BLKNO);
LockBuffer(meta, BUFFER_LOCK_SHARE);
page = BufferGetPage(meta);
- TestForOldSnapshot(snapshot, idxrel, page);
metadata = (BrinMetaPageData *) PageGetContents(page);
revmap = palloc(sizeof(BrinRevmap));
}
LockBuffer(*buf, mode);
page = BufferGetPage(*buf);
- TestForOldSnapshot(snapshot, idxRel, page);
/* If we land on a revmap page, start over */
if (BRIN_IS_REGULAR_PAGE(page))
LockBuffer(regBuf, BUFFER_LOCK_EXCLUSIVE);
regPg = BufferGetPage(regBuf);
- /*
- * We're only removing data, not reading it, so there's no need to
- * TestForOldSnapshot here.
- */
-
/* if this is no longer a regular page, tell caller to start over */
if (!BRIN_IS_REGULAR_PAGE(regPg))
{
stack->off = InvalidOffsetNumber;
page = BufferGetPage(stack->buffer);
- TestForOldSnapshot(snapshot, btree->index, page);
access = ginTraverseLock(stack->buffer, searchMode);
stack->buffer = ginStepRight(stack->buffer, btree->index, access);
stack->blkno = rightlink;
page = BufferGetPage(stack->buffer);
- TestForOldSnapshot(snapshot, btree->index, page);
if (!searchMode && GinPageIsIncompleteSplit(page))
ginFinishSplit(btree, stack, false, NULL);
return true;
page = BufferGetPage(stack->buffer);
- TestForOldSnapshot(snapshot, btree->index, page);
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
/*
for (;;)
{
page = BufferGetPage(pos->pendingBuffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
maxoff = PageGetMaxOffsetNumber(page);
if (pos->firstOffset > maxoff)
sizeof(bool) * (pos->lastOffset - pos->firstOffset));
page = BufferGetPage(pos->pendingBuffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
for (i = 0; i < so->nkeys; i++)
{
LockBuffer(metabuffer, GIN_SHARE);
page = BufferGetPage(metabuffer);
- TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page);
blkno = GinPageGetMeta(page)->head;
/*
PredicateLockPage(r, BufferGetBlockNumber(buffer), scan->xs_snapshot);
gistcheckpage(scan->indexRelation, buffer);
page = BufferGetPage(buffer);
- TestForOldSnapshot(scan->xs_snapshot, r, page);
opaque = GistPageGetOpaque(page);
/*
if (BlockNumberIsValid(blkno))
{
buf = _hash_getbuf(rel, blkno, HASH_READ, LH_OVERFLOW_PAGE);
- TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
if (!_hash_readpage(scan, &buf, dir))
end_of_scan = true;
}
{
buf = _hash_getbuf(rel, blkno, HASH_READ,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
- TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(buf));
/*
* We always maintain the pin on bucket page for whole scan
if (block_found)
{
*pagep = BufferGetPage(*bufp);
- TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
*opaquep = HashPageGetOpaque(*pagep);
}
}
*bufp = _hash_getbuf(rel, blkno, HASH_READ,
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
*pagep = BufferGetPage(*bufp);
- TestForOldSnapshot(scan->xs_snapshot, rel, *pagep);
*opaquep = HashPageGetOpaque(*pagep);
/*
buf = _hash_getbucketbuf_from_hashkey(rel, hashkey, HASH_READ, NULL);
PredicateLockPage(rel, BufferGetBlockNumber(buf), scan->xs_snapshot);
page = BufferGetPage(buf);
- TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = HashPageGetOpaque(page);
bucket = opaque->hasho_bucket;
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
old_buf = _hash_getbuf(rel, old_blkno, HASH_READ, LH_BUCKET_PAGE);
- TestForOldSnapshot(scan->xs_snapshot, rel, BufferGetPage(old_buf));
/*
* remember the split bucket buffer so as to use it later for
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, scan->rs_base.rs_rd, page);
lines = PageGetMaxOffsetNumber(page);
ntup = 0;
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
-
*linesleft = PageGetMaxOffsetNumber(page) - FirstOffsetNumber + 1;
if (ScanDirectionIsForward(dir))
/* Caller is responsible for ensuring buffer is locked if needed */
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
-
if (ScanDirectionIsForward(dir))
{
*lineoff = OffsetNumberNext(scan->rs_coffset);
/* continue from previously returned page/tuple */
block = scan->rs_cblock; /* current page */
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
lineindex = scan->rs_cindex + dir;
if (ScanDirectionIsForward(dir))
{
heapgetpage((TableScanDesc) scan, block);
page = BufferGetPage(scan->rs_cbuf);
- TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, page);
linesleft = scan->rs_ntuples;
lineindex = ScanDirectionIsForward(dir) ? 0 : linesleft - 1;
*/
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, relation, page);
/*
* We'd better check for out-of-range offnum in case of VACUUM since the
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(&ctid));
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, relation, page);
/*
* Check for bogus item number. This is not treated as an error
/* tuple visibility test, initialized for the relation */
GlobalVisState *vistest;
- /*
- * Thresholds set by TransactionIdLimitedForOldSnapshots() if they have
- * been computed (done on demand, and only if
- * OldSnapshotThresholdActive()). The first time a tuple is about to be
- * removed based on the limited horizon, old_snap_used is set to true, and
- * SetOldSnapshotThresholdTimestamp() is called. See
- * heap_prune_satisfies_vacuum().
- */
- TimestampTz old_snap_ts;
- TransactionId old_snap_xmin;
- bool old_snap_used;
-
TransactionId new_prune_xid; /* new prune hint value for page */
TransactionId snapshotConflictHorizon; /* latest xid removed */
int nredirected; /* numbers of entries in arrays below */
Page page = BufferGetPage(buffer);
TransactionId prune_xid;
GlobalVisState *vistest;
- TransactionId limited_xmin = InvalidTransactionId;
- TimestampTz limited_ts = 0;
Size minfree;
/*
if (RecoveryInProgress())
return;
- /*
- * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
- * currently are broken, and discussion of what to do about them is
- * ongoing. See
- * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
- */
- if (old_snapshot_threshold == 0)
- SnapshotTooOldMagicForTest();
-
/*
* First check whether there's any chance there's something to prune,
* determining the appropriate horizon is a waste if there's no prune_xid
/*
* Check whether prune_xid indicates that there may be dead rows that can
* be cleaned up.
- *
- * It is OK to check the old snapshot limit before acquiring the cleanup
- * lock because the worst that can happen is that we are not quite as
- * aggressive about the cleanup (by however many transaction IDs are
- * consumed between this point and acquiring the lock). This allows us to
- * save significant overhead in the case where the page is found not to be
- * prunable.
- *
- * Even if old_snapshot_threshold is set, we first check whether the page
- * can be pruned without. Both because
- * TransactionIdLimitedForOldSnapshots() is not cheap, and because not
- * unnecessarily relying on old_snapshot_threshold avoids causing
- * conflicts.
*/
vistest = GlobalVisTestFor(relation);
if (!GlobalVisTestIsRemovableXid(vistest, prune_xid))
- {
- if (!OldSnapshotThresholdActive())
- return;
-
- if (!TransactionIdLimitedForOldSnapshots(GlobalVisTestNonRemovableHorizon(vistest),
- relation,
- &limited_xmin, &limited_ts))
- return;
-
- if (!TransactionIdPrecedes(prune_xid, limited_xmin))
- return;
- }
+ return;
/*
* We prune when a previous UPDATE failed to find enough space on the page
int ndeleted,
nnewlpdead;
- ndeleted = heap_page_prune(relation, buffer, vistest, limited_xmin,
- limited_ts, &nnewlpdead, NULL);
+ ndeleted = heap_page_prune(relation, buffer, vistest,
+ &nnewlpdead, NULL);
/*
* Report the number of tuples reclaimed to pgstats. This is
*
* vistest is used to distinguish whether tuples are DEAD or RECENTLY_DEAD
* (see heap_prune_satisfies_vacuum and
- * HeapTupleSatisfiesVacuum). old_snap_xmin / old_snap_ts need to
- * either have been set by TransactionIdLimitedForOldSnapshots, or
- * InvalidTransactionId/0 respectively.
+ * HeapTupleSatisfiesVacuum).
*
* Sets *nnewlpdead for caller, indicating the number of items that were
* newly set LP_DEAD during prune operation.
int
heap_page_prune(Relation relation, Buffer buffer,
GlobalVisState *vistest,
- TransactionId old_snap_xmin,
- TimestampTz old_snap_ts,
int *nnewlpdead,
OffsetNumber *off_loc)
{
prstate.new_prune_xid = InvalidTransactionId;
prstate.rel = relation;
prstate.vistest = vistest;
- prstate.old_snap_xmin = old_snap_xmin;
- prstate.old_snap_ts = old_snap_ts;
- prstate.old_snap_used = false;
prstate.snapshotConflictHorizon = InvalidTransactionId;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
/*
* Perform visibility checks for heap pruning.
- *
- * This is more complicated than just using GlobalVisTestIsRemovableXid()
- * because of old_snapshot_threshold. We only want to increase the threshold
- * that triggers errors for old snapshots when we actually decide to remove a
- * row based on the limited horizon.
- *
- * Due to its cost we also only want to call
- * TransactionIdLimitedForOldSnapshots() if necessary, i.e. we might not have
- * done so in heap_page_prune_opt() if pd_prune_xid was old enough. But we
- * still want to be able to remove rows that are too new to be removed
- * according to prstate->vistest, but that can be removed based on
- * old_snapshot_threshold. So we call TransactionIdLimitedForOldSnapshots() on
- * demand in here, if appropriate.
*/
static HTSV_Result
heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
if (res != HEAPTUPLE_RECENTLY_DEAD)
return res;
- /*
- * If we are already relying on the limited xmin, there is no need to
- * delay doing so anymore.
- */
- if (prstate->old_snap_used)
- {
- Assert(TransactionIdIsValid(prstate->old_snap_xmin));
-
- if (TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
- res = HEAPTUPLE_DEAD;
- return res;
- }
-
- /*
- * First check if GlobalVisTestIsRemovableXid() is sufficient to find the
- * row dead. If not, and old_snapshot_threshold is enabled, try to use the
- * lowered horizon.
- */
if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
res = HEAPTUPLE_DEAD;
- else if (OldSnapshotThresholdActive())
- {
- /* haven't determined limited horizon yet, requests */
- if (!TransactionIdIsValid(prstate->old_snap_xmin))
- {
- TransactionId horizon =
- GlobalVisTestNonRemovableHorizon(prstate->vistest);
-
- TransactionIdLimitedForOldSnapshots(horizon, prstate->rel,
- &prstate->old_snap_xmin,
- &prstate->old_snap_ts);
- }
-
- if (TransactionIdIsValid(prstate->old_snap_xmin) &&
- TransactionIdPrecedes(dead_after, prstate->old_snap_xmin))
- {
- /*
- * About to remove row based on snapshot_too_old. Need to raise
- * the threshold so problematic accesses would error.
- */
- Assert(!prstate->old_snap_used);
- SetOldSnapshotThresholdTimestamp(prstate->old_snap_ts,
- prstate->old_snap_xmin);
- prstate->old_snap_used = true;
- res = HEAPTUPLE_DEAD;
- }
- }
return res;
}
* that were deleted from indexes.
*/
tuples_deleted = heap_page_prune(rel, buf, vacrel->vistest,
- InvalidTransactionId, 0, &nnewlpdead,
+ &nnewlpdead,
&vacrel->offnum);
/*
{
BlockNumber possibly_freeable;
- if (!vacrel->do_rel_truncate || VacuumFailsafeActive ||
- old_snapshot_threshold >= 0)
+ if (!vacrel->do_rel_truncate || VacuumFailsafeActive)
return false;
possibly_freeable = vacrel->rel_pages - vacrel->nonempty_pages;
for (;;)
{
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (P_RIGHTMOST(opaque))
/* step right one page */
so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(so->currPos.buf);
- TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = BTPageGetOpaque(page);
/* check for deleted page */
if (!P_IGNORE(opaque))
* and do it all again.
*/
page = BufferGetPage(so->currPos.buf);
- TestForOldSnapshot(scan->xs_snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (!P_IGNORE(opaque))
{
CHECK_FOR_INTERRUPTS();
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
/*
blkno = opaque->btpo_next;
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
}
/* Return to the original page to see what's up */
buf = _bt_relandgetbuf(rel, buf, obknum, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (P_ISDELETED(opaque))
{
blkno = opaque->btpo_next;
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
if (!P_ISDELETED(opaque))
break;
return InvalidBuffer;
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
for (;;)
RelationGetRelationName(rel));
buf = _bt_relandgetbuf(rel, buf, blkno, BT_READ);
page = BufferGetPage(buf);
- TestForOldSnapshot(snapshot, rel, page);
opaque = BTPageGetOpaque(page);
}
/* else new pointer points to the same page, no work needed */
page = BufferGetPage(buffer);
- TestForOldSnapshot(snapshot, index, page);
isnull = SpGistPageStoresNulls(page) ? true : false;
/*
* If we found any potentially broken HOT chains, mark the index as not
* being usable until the current transaction is below the event horizon.
- * See src/backend/access/heap/README.HOT for discussion. Also set this
- * if early pruning/vacuuming is enabled for the heap relation. While it
- * might become safe to use the index earlier based on actual cleanup
- * activity and other active transactions, the test for that would be much
- * more complex and would require some form of blocking, so keep it simple
- * and fast by just using the current transaction.
+ * See src/backend/access/heap/README.HOT for discussion. While it might
+ * become safe to use the index earlier based on actual cleanup activity
+ * and other active transactions, the test for that would be much more
+ * complex and would require some form of blocking, so keep it simple and
+ * fast by just using the current transaction.
*
* However, when reindexing an existing index, we should do nothing here.
* Any HOT chains that are broken with respect to the index must predate
*
* We also need not set indcheckxmin during a concurrent index build,
* because we won't set indisvalid true until all transactions that care
- * about the broken HOT chains or early pruning/vacuuming are gone.
+ * about the broken HOT chains are gone.
*
* Therefore, this code path can only be taken during non-concurrent
* CREATE INDEX. Thus the fact that heap_update will set the pg_index
* about any concurrent readers of the tuple; no other transaction can see
* it yet.
*/
- if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
+ if (indexInfo->ii_BrokenHotChain &&
!isreindex &&
!indexInfo->ii_Concurrent)
{
* reindexing pg_index itself, we must not try to update tuples in it.
* pg_index's indexes should always have these flags in their clean state,
* so that won't happen.
- *
- * If early pruning/vacuuming is enabled for the heap relation, the
- * usability horizon must be advanced to the current transaction on every
- * build or rebuild. pg_index is OK in this regard because catalog tables
- * are not subject to early cleanup.
*/
if (!skipped_constraint)
{
HeapTuple indexTuple;
Form_pg_index indexForm;
bool index_bad;
- bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
pg_index = table_open(IndexRelationId, RowExclusiveLock);
!indexForm->indisready ||
!indexForm->indislive);
if (index_bad ||
- (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
- early_pruning_enabled)
+ (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain))
{
- if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
+ if (!indexInfo->ii_BrokenHotChain)
indexForm->indcheckxmin = false;
- else if (index_bad || early_pruning_enabled)
+ else if (index_bad)
indexForm->indcheckxmin = true;
indexForm->indisvalid = true;
indexForm->indisready = true;
*/
cutoffs->OldestXmin = GetOldestNonRemovableTransactionId(rel);
- if (OldSnapshotThresholdActive())
- {
- TransactionId limit_xmin;
- TimestampTz limit_ts;
-
- if (TransactionIdLimitedForOldSnapshots(cutoffs->OldestXmin, rel,
- &limit_xmin, &limit_ts))
- {
- /*
- * TODO: We should only set the threshold if we are pruning on the
- * basis of the increased limits. Not as crucial here as it is
- * for opportunistic pruning (which often happens at a much higher
- * frequency), but would still be a significant improvement.
- */
- SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
- cutoffs->OldestXmin = limit_xmin;
- }
- }
-
Assert(TransactionIdIsNormal(cutoffs->OldestXmin));
/* Acquire OldestMxact */
wb_context->nr_pending = 0;
}
-
-
-/*
- * Implement slower/larger portions of TestForOldSnapshot
- *
- * Smaller/faster portions are put inline, but the entire set of logic is too
- * big for that.
- */
-void
-TestForOldSnapshot_impl(Snapshot snapshot, Relation relation)
-{
- if (RelationAllowsEarlyPruning(relation)
- && (snapshot)->whenTaken < GetOldSnapshotThresholdTimestamp())
- ereport(ERROR,
- (errcode(ERRCODE_SNAPSHOT_TOO_OLD),
- errmsg("snapshot too old")));
-}
size = add_size(size, WalRcvShmemSize());
size = add_size(size, PgArchShmemSize());
size = add_size(size, ApplyLauncherShmemSize());
- size = add_size(size, SnapMgrShmemSize());
size = add_size(size, BTreeShmemSize());
size = add_size(size, SyncScanShmemSize());
size = add_size(size, AsyncShmemSize());
/*
* Set up other modules that need some shared memory space
*/
- SnapMgrInit();
BTreeShmemInit();
SyncScanShmemInit();
AsyncShmemInit();
return TOTAL_MAX_CACHED_SUBXIDS;
}
-/*
- * Initialize old_snapshot_threshold specific parts of a newly build snapshot.
- */
-static void
-GetSnapshotDataInitOldSnapshot(Snapshot snapshot)
-{
- if (!OldSnapshotThresholdActive())
- {
- /*
- * If not using "snapshot too old" feature, fill related fields with
- * dummy values that don't require any locking.
- */
- snapshot->lsn = InvalidXLogRecPtr;
- snapshot->whenTaken = 0;
- }
- else
- {
- /*
- * Capture the current time and WAL stream location in case this
- * snapshot becomes old enough to need to fall back on the special
- * "old snapshot" logic.
- */
- snapshot->lsn = GetXLogInsertRecPtr();
- snapshot->whenTaken = GetSnapshotCurrentTimestamp();
- MaintainOldSnapshotTimeMapping(snapshot->whenTaken, snapshot->xmin);
- }
-}
-
/*
* Helper function for GetSnapshotData() that checks if the bulk of the
* visibility information in the snapshot is still valid. If so, it updates
snapshot->active_count = 0;
snapshot->regd_count = 0;
snapshot->copied = false;
-
- GetSnapshotDataInitOldSnapshot(snapshot);
+ snapshot->lsn = InvalidXLogRecPtr;
+ snapshot->whenTaken = 0;
return true;
}
snapshot->active_count = 0;
snapshot->regd_count = 0;
snapshot->copied = false;
-
- GetSnapshotDataInitOldSnapshot(snapshot);
+ snapshot->lsn = InvalidXLogRecPtr;
+ snapshot->whenTaken = 0;
return snapshot;
}
CommitTsLock 39
ReplicationOriginLock 40
MultiXactTruncationLock 41
-OldSnapshotTimeMapLock 42
+# 42 was OldSnapshotTimeMapLock
LogicalRepWorkerLock 43
XactTruncationLock 44
# 45 was XactTruncationLock until removal of BackendRandomLock
WAIT_EVENT_DOCONLY CommitTs "Waiting to read or update the last value set for a transaction commit timestamp."
WAIT_EVENT_DOCONLY ReplicationOrigin "Waiting to create, drop or use a replication origin."
WAIT_EVENT_DOCONLY MultiXactTruncation "Waiting to read or truncate multixact information."
-WAIT_EVENT_DOCONLY OldSnapshotTimeMap "Waiting to read or update old snapshot control information."
WAIT_EVENT_DOCONLY LogicalRepWorker "Waiting to read or update the state of logical replication workers."
WAIT_EVENT_DOCONLY XactTruncation "Waiting to execute <function>pg_xact_status</function> or update the oldest transaction ID available to it."
WAIT_EVENT_DOCONLY WrapLimitsVacuum "Waiting to update limits on transaction id and multixact consumption."
58P01 E ERRCODE_UNDEFINED_FILE undefined_file
58P02 E ERRCODE_DUPLICATE_FILE duplicate_file
-Section: Class 72 - Snapshot Failure
-# (class borrowed from Oracle)
-72000 E ERRCODE_SNAPSHOT_TOO_OLD snapshot_too_old
-
Section: Class F0 - Configuration File Error
# (PostgreSQL-specific error class)
check_autovacuum_work_mem, NULL, NULL
},
- {
- {"old_snapshot_threshold", PGC_POSTMASTER, RESOURCES_ASYNCHRONOUS,
- gettext_noop("Time before a snapshot is too old to read pages changed after the snapshot was taken."),
- gettext_noop("A value of -1 disables this feature."),
- GUC_UNIT_MIN
- },
- &old_snapshot_threshold,
- -1, -1, MINS_PER_HOUR * HOURS_PER_DAY * 60,
- NULL, NULL, NULL
- },
-
{
{"tcp_keepalives_idle", PGC_USERSET, CONN_AUTH_TCP,
gettext_noop("Time between issuing TCP keepalives."),
#max_parallel_workers = 8 # maximum number of max_worker_processes that
# can be used in parallel operations
#parallel_leader_participation = on
-#old_snapshot_threshold = -1 # 1min-60d; -1 disables; 0 is immediate
- # (change requires restart)
#------------------------------------------------------------------------------
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
-#include "utils/old_snapshot.h"
#include "utils/rel.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
#include "utils/timestamp.h"
-/*
- * GUC parameters
- */
-int old_snapshot_threshold; /* number of minutes, -1 disables */
-
-volatile OldSnapshotControlData *oldSnapshotControl;
-
-
/*
* CurrentSnapshot points to the only snapshot taken in transaction-snapshot
* mode, and to the latest one taken in a read-committed transaction.
static List *exportedSnapshots = NIL;
/* Prototypes for local functions */
-static TimestampTz AlignTimestampToMinuteBoundary(TimestampTz ts);
static Snapshot CopySnapshot(Snapshot snapshot);
static void FreeSnapshot(Snapshot snapshot);
static void SnapshotResetXmin(void);
XLogRecPtr lsn;
} SerializedSnapshotData;
-Size
-SnapMgrShmemSize(void)
-{
- Size size;
-
- size = offsetof(OldSnapshotControlData, xid_by_minute);
- if (old_snapshot_threshold > 0)
- size = add_size(size, mul_size(sizeof(TransactionId),
- OLD_SNAPSHOT_TIME_MAP_ENTRIES));
-
- return size;
-}
-
-/*
- * Initialize for managing old snapshot detection.
- */
-void
-SnapMgrInit(void)
-{
- bool found;
-
- /*
- * Create or attach to the OldSnapshotControlData structure.
- */
- oldSnapshotControl = (volatile OldSnapshotControlData *)
- ShmemInitStruct("OldSnapshotControlData",
- SnapMgrShmemSize(), &found);
-
- if (!found)
- {
- SpinLockInit(&oldSnapshotControl->mutex_current);
- oldSnapshotControl->current_timestamp = 0;
- SpinLockInit(&oldSnapshotControl->mutex_latest_xmin);
- oldSnapshotControl->latest_xmin = InvalidTransactionId;
- oldSnapshotControl->next_map_update = 0;
- SpinLockInit(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = 0;
- oldSnapshotControl->threshold_xid = InvalidTransactionId;
- oldSnapshotControl->head_offset = 0;
- oldSnapshotControl->head_timestamp = 0;
- oldSnapshotControl->count_used = 0;
- }
-}
-
/*
* GetTransactionSnapshot
* Get the appropriate snapshot for a new query in a transaction.
}
-/*
- * Return a timestamp that is exactly on a minute boundary.
- *
- * If the argument is already aligned, return that value, otherwise move to
- * the next minute boundary following the given time.
- */
-static TimestampTz
-AlignTimestampToMinuteBoundary(TimestampTz ts)
-{
- TimestampTz retval = ts + (USECS_PER_MINUTE - 1);
-
- return retval - (retval % USECS_PER_MINUTE);
-}
-
-/*
- * Get current timestamp for snapshots
- *
- * This is basically GetCurrentTimestamp(), but with a guarantee that
- * the result never moves backward.
- */
-TimestampTz
-GetSnapshotCurrentTimestamp(void)
-{
- TimestampTz now = GetCurrentTimestamp();
-
- /*
- * Don't let time move backward; if it hasn't advanced, use the old value.
- */
- SpinLockAcquire(&oldSnapshotControl->mutex_current);
- if (now <= oldSnapshotControl->current_timestamp)
- now = oldSnapshotControl->current_timestamp;
- else
- oldSnapshotControl->current_timestamp = now;
- SpinLockRelease(&oldSnapshotControl->mutex_current);
-
- return now;
-}
-
-/*
- * Get timestamp through which vacuum may have processed based on last stored
- * value for threshold_timestamp.
- *
- * XXX: So far, we never trust that a 64-bit value can be read atomically; if
- * that ever changes, we could get rid of the spinlock here.
- */
-TimestampTz
-GetOldSnapshotThresholdTimestamp(void)
-{
- TimestampTz threshold_timestamp;
-
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-
- return threshold_timestamp;
-}
-
-void
-SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit)
-{
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- Assert(oldSnapshotControl->threshold_timestamp <= ts);
- Assert(TransactionIdPrecedesOrEquals(oldSnapshotControl->threshold_xid, xlimit));
- oldSnapshotControl->threshold_timestamp = ts;
- oldSnapshotControl->threshold_xid = xlimit;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-}
-
-/*
- * XXX: Magic to keep old_snapshot_threshold tests appear "working". They
- * currently are broken, and discussion of what to do about them is
- * ongoing. See
- * https://www.postgresql.org/message-id/20200403001235.e6jfdll3gh2ygbuc%40alap3.anarazel.de
- */
-void
-SnapshotTooOldMagicForTest(void)
-{
- TimestampTz ts = GetSnapshotCurrentTimestamp();
-
- Assert(old_snapshot_threshold == 0);
-
- ts -= 5 * USECS_PER_SEC;
-
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- oldSnapshotControl->threshold_timestamp = ts;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-}
-
-/*
- * If there is a valid mapping for the timestamp, set *xlimitp to
- * that. Returns whether there is such a mapping.
- */
-static bool
-GetOldSnapshotFromTimeMapping(TimestampTz ts, TransactionId *xlimitp)
-{
- bool in_mapping = false;
-
- Assert(ts == AlignTimestampToMinuteBoundary(ts));
-
- LWLockAcquire(OldSnapshotTimeMapLock, LW_SHARED);
-
- if (oldSnapshotControl->count_used > 0
- && ts >= oldSnapshotControl->head_timestamp)
- {
- int offset;
-
- offset = ((ts - oldSnapshotControl->head_timestamp)
- / USECS_PER_MINUTE);
- if (offset > oldSnapshotControl->count_used - 1)
- offset = oldSnapshotControl->count_used - 1;
- offset = (oldSnapshotControl->head_offset + offset)
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- *xlimitp = oldSnapshotControl->xid_by_minute[offset];
-
- in_mapping = true;
- }
-
- LWLockRelease(OldSnapshotTimeMapLock);
-
- return in_mapping;
-}
-
-/*
- * TransactionIdLimitedForOldSnapshots
- *
- * Apply old snapshot limit. This is intended to be called for page pruning
- * and table vacuuming, to allow old_snapshot_threshold to override the normal
- * global xmin value. Actual testing for snapshot too old will be based on
- * whether a snapshot timestamp is prior to the threshold timestamp set in
- * this function.
- *
- * If the limited horizon allows a cleanup action that otherwise would not be
- * possible, SetOldSnapshotThresholdTimestamp(*limit_ts, *limit_xid) needs to
- * be called before that cleanup action.
- */
-bool
-TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
- Relation relation,
- TransactionId *limit_xid,
- TimestampTz *limit_ts)
-{
- TimestampTz ts;
- TransactionId xlimit = recentXmin;
- TransactionId latest_xmin;
- TimestampTz next_map_update_ts;
- TransactionId threshold_timestamp;
- TransactionId threshold_xid;
-
- Assert(TransactionIdIsNormal(recentXmin));
- Assert(OldSnapshotThresholdActive());
- Assert(limit_ts != NULL && limit_xid != NULL);
-
- /*
- * TestForOldSnapshot() assumes early pruning advances the page LSN, so we
- * can't prune early when skipping WAL.
- */
- if (!RelationAllowsEarlyPruning(relation) || !RelationNeedsWAL(relation))
- return false;
-
- ts = GetSnapshotCurrentTimestamp();
-
- SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
- latest_xmin = oldSnapshotControl->latest_xmin;
- next_map_update_ts = oldSnapshotControl->next_map_update;
- SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
-
- /*
- * Zero threshold always overrides to latest xmin, if valid. Without some
- * heuristic it will find its own snapshot too old on, for example, a
- * simple UPDATE -- which would make it useless for most testing, but
- * there is no principled way to ensure that it doesn't fail in this way.
- * Use a five-second delay to try to get useful testing behavior, but this
- * may need adjustment.
- */
- if (old_snapshot_threshold == 0)
- {
- if (TransactionIdPrecedes(latest_xmin, MyProc->xmin)
- && TransactionIdFollows(latest_xmin, xlimit))
- xlimit = latest_xmin;
-
- ts -= 5 * USECS_PER_SEC;
- }
- else
- {
- ts = AlignTimestampToMinuteBoundary(ts)
- - (old_snapshot_threshold * USECS_PER_MINUTE);
-
- /* Check for fast exit without LW locking. */
- SpinLockAcquire(&oldSnapshotControl->mutex_threshold);
- threshold_timestamp = oldSnapshotControl->threshold_timestamp;
- threshold_xid = oldSnapshotControl->threshold_xid;
- SpinLockRelease(&oldSnapshotControl->mutex_threshold);
-
- if (ts == threshold_timestamp)
- {
- /*
- * Current timestamp is in same bucket as the last limit that was
- * applied. Reuse.
- */
- xlimit = threshold_xid;
- }
- else if (ts == next_map_update_ts)
- {
- /*
- * FIXME: This branch is super iffy - but that should probably
- * fixed separately.
- */
- xlimit = latest_xmin;
- }
- else if (GetOldSnapshotFromTimeMapping(ts, &xlimit))
- {
- }
-
- /*
- * Failsafe protection against vacuuming work of active transaction.
- *
- * This is not an assertion because we avoid the spinlock for
- * performance, leaving open the possibility that xlimit could advance
- * and be more current; but it seems prudent to apply this limit. It
- * might make pruning a tiny bit less aggressive than it could be, but
- * protects against data loss bugs.
- */
- if (TransactionIdIsNormal(latest_xmin)
- && TransactionIdPrecedes(latest_xmin, xlimit))
- xlimit = latest_xmin;
- }
-
- if (TransactionIdIsValid(xlimit) &&
- TransactionIdFollowsOrEquals(xlimit, recentXmin))
- {
- *limit_ts = ts;
- *limit_xid = xlimit;
-
- return true;
- }
-
- return false;
-}
-
-/*
- * Take care of the circular buffer that maps time to xid.
- */
-void
-MaintainOldSnapshotTimeMapping(TimestampTz whenTaken, TransactionId xmin)
-{
- TimestampTz ts;
- TransactionId latest_xmin;
- TimestampTz update_ts;
- bool map_update_required = false;
-
- /* Never call this function when old snapshot checking is disabled. */
- Assert(old_snapshot_threshold >= 0);
-
- ts = AlignTimestampToMinuteBoundary(whenTaken);
-
- /*
- * Keep track of the latest xmin seen by any process. Update mapping with
- * a new value when we have crossed a bucket boundary.
- */
- SpinLockAcquire(&oldSnapshotControl->mutex_latest_xmin);
- latest_xmin = oldSnapshotControl->latest_xmin;
- update_ts = oldSnapshotControl->next_map_update;
- if (ts > update_ts)
- {
- oldSnapshotControl->next_map_update = ts;
- map_update_required = true;
- }
- if (TransactionIdFollows(xmin, latest_xmin))
- oldSnapshotControl->latest_xmin = xmin;
- SpinLockRelease(&oldSnapshotControl->mutex_latest_xmin);
-
- /* We only needed to update the most recent xmin value. */
- if (!map_update_required)
- return;
-
- /* No further tracking needed for 0 (used for testing). */
- if (old_snapshot_threshold == 0)
- return;
-
- /*
- * We don't want to do something stupid with unusual values, but we don't
- * want to litter the log with warnings or break otherwise normal
- * processing for this feature; so if something seems unreasonable, just
- * log at DEBUG level and return without doing anything.
- */
- if (whenTaken < 0)
- {
- elog(DEBUG1,
- "MaintainOldSnapshotTimeMapping called with negative whenTaken = %ld",
- (long) whenTaken);
- return;
- }
- if (!TransactionIdIsNormal(xmin))
- {
- elog(DEBUG1,
- "MaintainOldSnapshotTimeMapping called with xmin = %lu",
- (unsigned long) xmin);
- return;
- }
-
- LWLockAcquire(OldSnapshotTimeMapLock, LW_EXCLUSIVE);
-
- Assert(oldSnapshotControl->head_offset >= 0);
- Assert(oldSnapshotControl->head_offset < OLD_SNAPSHOT_TIME_MAP_ENTRIES);
- Assert((oldSnapshotControl->head_timestamp % USECS_PER_MINUTE) == 0);
- Assert(oldSnapshotControl->count_used >= 0);
- Assert(oldSnapshotControl->count_used <= OLD_SNAPSHOT_TIME_MAP_ENTRIES);
-
- if (oldSnapshotControl->count_used == 0)
- {
- /* set up first entry for empty mapping */
- oldSnapshotControl->head_offset = 0;
- oldSnapshotControl->head_timestamp = ts;
- oldSnapshotControl->count_used = 1;
- oldSnapshotControl->xid_by_minute[0] = xmin;
- }
- else if (ts < oldSnapshotControl->head_timestamp)
- {
- /* old ts; log it at DEBUG */
- LWLockRelease(OldSnapshotTimeMapLock);
- elog(DEBUG1,
- "MaintainOldSnapshotTimeMapping called with old whenTaken = %ld",
- (long) whenTaken);
- return;
- }
- else if (ts <= (oldSnapshotControl->head_timestamp +
- ((oldSnapshotControl->count_used - 1)
- * USECS_PER_MINUTE)))
- {
- /* existing mapping; advance xid if possible */
- int bucket = (oldSnapshotControl->head_offset
- + ((ts - oldSnapshotControl->head_timestamp)
- / USECS_PER_MINUTE))
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- if (TransactionIdPrecedes(oldSnapshotControl->xid_by_minute[bucket], xmin))
- oldSnapshotControl->xid_by_minute[bucket] = xmin;
- }
- else
- {
- /* We need a new bucket, but it might not be the very next one. */
- int distance_to_new_tail;
- int distance_to_current_tail;
- int advance;
-
- /*
- * Our goal is for the new "tail" of the mapping, that is, the entry
- * which is newest and thus furthest from the "head" entry, to
- * correspond to "ts". Since there's one entry per minute, the
- * distance between the current head and the new tail is just the
- * number of minutes of difference between ts and the current
- * head_timestamp.
- *
- * The distance from the current head to the current tail is one less
- * than the number of entries in the mapping, because the entry at the
- * head_offset is for 0 minutes after head_timestamp.
- *
- * The difference between these two values is the number of minutes by
- * which we need to advance the mapping, either adding new entries or
- * rotating old ones out.
- */
- distance_to_new_tail =
- (ts - oldSnapshotControl->head_timestamp) / USECS_PER_MINUTE;
- distance_to_current_tail =
- oldSnapshotControl->count_used - 1;
- advance = distance_to_new_tail - distance_to_current_tail;
- Assert(advance > 0);
-
- if (advance >= OLD_SNAPSHOT_TIME_MAP_ENTRIES)
- {
- /* Advance is so far that all old data is junk; start over. */
- oldSnapshotControl->head_offset = 0;
- oldSnapshotControl->count_used = 1;
- oldSnapshotControl->xid_by_minute[0] = xmin;
- oldSnapshotControl->head_timestamp = ts;
- }
- else
- {
- /* Store the new value in one or more buckets. */
- int i;
-
- for (i = 0; i < advance; i++)
- {
- if (oldSnapshotControl->count_used == OLD_SNAPSHOT_TIME_MAP_ENTRIES)
- {
- /* Map full and new value replaces old head. */
- int old_head = oldSnapshotControl->head_offset;
-
- if (old_head == (OLD_SNAPSHOT_TIME_MAP_ENTRIES - 1))
- oldSnapshotControl->head_offset = 0;
- else
- oldSnapshotControl->head_offset = old_head + 1;
- oldSnapshotControl->xid_by_minute[old_head] = xmin;
- oldSnapshotControl->head_timestamp += USECS_PER_MINUTE;
- }
- else
- {
- /* Extend map to unused entry. */
- int new_tail = (oldSnapshotControl->head_offset
- + oldSnapshotControl->count_used)
- % OLD_SNAPSHOT_TIME_MAP_ENTRIES;
-
- oldSnapshotControl->count_used++;
- oldSnapshotControl->xid_by_minute[new_tail] = xmin;
- }
- }
- }
- }
-
- LWLockRelease(OldSnapshotTimeMapLock);
-}
-
-
/*
* Setup a snapshot that replaces normal catalog snapshots that allows catalog
* access to behave just like it did at a certain point in the past.
extern void heap_page_prune_opt(Relation relation, Buffer buffer);
extern int heap_page_prune(Relation relation, Buffer buffer,
struct GlobalVisState *vistest,
- TransactionId old_snap_xmin,
- TimestampTz old_snap_ts,
int *nnewlpdead,
OffsetNumber *off_loc);
extern void heap_page_prune_execute(Buffer buffer,
extern bool BgBufferSync(struct WritebackContext *wb_context);
-extern void TestForOldSnapshot_impl(Snapshot snapshot, Relation relation);
-
/* in buf_init.c */
extern void InitBufferPool(void);
extern Size BufferShmemSize(void);
/*
* BufferGetPage
* Returns the page associated with a buffer.
- *
- * When this is called as part of a scan, there may be a need for a nearby
- * call to TestForOldSnapshot(). See the definition of that for details.
*/
static inline Page
BufferGetPage(Buffer buffer)
return (Page) BufferGetBlock(buffer);
}
-/*
- * Check whether the given snapshot is too old to have safely read the given
- * page from the given table. If so, throw a "snapshot too old" error.
- *
- * This test generally needs to be performed after every BufferGetPage() call
- * that is executed as part of a scan. It is not needed for calls made for
- * modifying the page (for example, to position to the right place to insert a
- * new index tuple or for vacuuming). It may also be omitted where calls to
- * lower-level functions will have already performed the test.
- *
- * Note that a NULL snapshot argument is allowed and causes a fast return
- * without error; this is to support call sites which can be called from
- * either scans or index modification areas.
- *
- * For best performance, keep the tests that are fastest and/or most likely to
- * exclude a page from old snapshot testing near the front.
- */
-static inline void
-TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
-{
- Assert(relation != NULL);
-
- if (old_snapshot_threshold >= 0
- && (snapshot) != NULL
- && ((snapshot)->snapshot_type == SNAPSHOT_MVCC
- || (snapshot)->snapshot_type == SNAPSHOT_TOAST)
- && !XLogRecPtrIsInvalid((snapshot)->lsn)
- && PageGetLSN(page) > (snapshot)->lsn)
- TestForOldSnapshot_impl(snapshot, relation);
-}
-
#endif /* FRONTEND */
#endif /* BUFMGR_H */
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * old_snapshot.h
- * Data structures for 'snapshot too old'
- *
- * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- * src/include/utils/old_snapshot.h
- *
- *-------------------------------------------------------------------------
- */
-
-#ifndef OLD_SNAPSHOT_H
-#define OLD_SNAPSHOT_H
-
-#include "datatype/timestamp.h"
-#include "storage/s_lock.h"
-
-/*
- * Structure for dealing with old_snapshot_threshold implementation.
- */
-typedef struct OldSnapshotControlData
-{
- /*
- * Variables for old snapshot handling are shared among processes and are
- * only allowed to move forward.
- */
- slock_t mutex_current; /* protect current_timestamp */
- TimestampTz current_timestamp; /* latest snapshot timestamp */
- slock_t mutex_latest_xmin; /* protect latest_xmin and next_map_update */
- TransactionId latest_xmin; /* latest snapshot xmin */
- TimestampTz next_map_update; /* latest snapshot valid up to */
- slock_t mutex_threshold; /* protect threshold fields */
- TimestampTz threshold_timestamp; /* earlier snapshot is old */
- TransactionId threshold_xid; /* earlier xid may be gone */
-
- /*
- * Keep one xid per minute for old snapshot error handling.
- *
- * Use a circular buffer with a head offset, a count of entries currently
- * used, and a timestamp corresponding to the xid at the head offset. A
- * count_used value of zero means that there are no times stored; a
- * count_used value of OLD_SNAPSHOT_TIME_MAP_ENTRIES means that the buffer
- * is full and the head must be advanced to add new entries. Use
- * timestamps aligned to minute boundaries, since that seems less
- * surprising than aligning based on the first usage timestamp. The
- * latest bucket is effectively stored within latest_xmin. The circular
- * buffer is updated when we get a new xmin value that doesn't fall into
- * the same interval.
- *
- * It is OK if the xid for a given time slot is from earlier than
- * calculated by adding the number of minutes corresponding to the
- * (possibly wrapped) distance from the head offset to the time of the
- * head entry, since that just results in the vacuuming of old tuples
- * being slightly less aggressive. It would not be OK for it to be off in
- * the other direction, since it might result in vacuuming tuples that are
- * still expected to be there.
- *
- * Use of an SLRU was considered but not chosen because it is more
- * heavyweight than is needed for this, and would probably not be any less
- * code to implement.
- *
- * Persistence is not needed.
- */
- int head_offset; /* subscript of oldest tracked time */
- TimestampTz head_timestamp; /* time corresponding to head xid */
- int count_used; /* how many slots are in use */
- TransactionId xid_by_minute[FLEXIBLE_ARRAY_MEMBER];
-} OldSnapshotControlData;
-
-extern PGDLLIMPORT volatile OldSnapshotControlData *oldSnapshotControl;
-
-#endif
#include "utils/snapshot.h"
-/*
- * The structure used to map times to TransactionId values for the "snapshot
- * too old" feature must have a few entries at the tail to hold old values;
- * otherwise the lookup will often fail and the expected early pruning or
- * vacuum will not usually occur. It is best if this padding is for a number
- * of minutes greater than a thread would normally be stalled, but it's OK if
- * early vacuum opportunities are occasionally missed, so there's no need to
- * use an extreme value or get too fancy. 10 minutes seems plenty.
- */
-#define OLD_SNAPSHOT_PADDING_ENTRIES 10
-#define OLD_SNAPSHOT_TIME_MAP_ENTRIES (old_snapshot_threshold + OLD_SNAPSHOT_PADDING_ENTRIES)
-
-/*
- * Common definition of relation properties that allow early pruning/vacuuming
- * when old_snapshot_threshold >= 0.
- */
-#define RelationAllowsEarlyPruning(rel) \
-( \
- RelationIsPermanent(rel) && !IsCatalogRelation(rel) \
- && !RelationIsAccessibleInLogicalDecoding(rel) \
-)
-
-#define EarlyPruningEnabled(rel) (old_snapshot_threshold >= 0 && RelationAllowsEarlyPruning(rel))
-
-/* GUC variables */
-extern PGDLLIMPORT int old_snapshot_threshold;
-
-
-extern Size SnapMgrShmemSize(void);
-extern void SnapMgrInit(void);
-extern TimestampTz GetSnapshotCurrentTimestamp(void);
-extern TimestampTz GetOldSnapshotThresholdTimestamp(void);
-extern void SnapshotTooOldMagicForTest(void);
-
extern PGDLLIMPORT bool FirstSnapshotSet;
extern PGDLLIMPORT TransactionId TransactionXmin;
((snapshot)->snapshot_type == SNAPSHOT_MVCC || \
(snapshot)->snapshot_type == SNAPSHOT_HISTORIC_MVCC)
-#ifndef FRONTEND
-static inline bool
-OldSnapshotThresholdActive(void)
-{
- return old_snapshot_threshold >= 0;
-}
-#endif
-
extern Snapshot GetTransactionSnapshot(void);
extern Snapshot GetLatestSnapshot(void);
extern void SnapshotSetCommandId(CommandId curcid);
extern void WaitForOlderSnapshots(TransactionId limitXmin, bool progress);
extern bool ThereAreNoPriorRegisteredSnapshots(void);
extern bool HaveRegisteredOrActiveSnapshot(void);
-extern bool TransactionIdLimitedForOldSnapshots(TransactionId recentXmin,
- Relation relation,
- TransactionId *limit_xid,
- TimestampTz *limit_ts);
-extern void SetOldSnapshotThresholdTimestamp(TimestampTz ts, TransactionId xlimit);
-extern void MaintainOldSnapshotTimeMapping(TimestampTz whenTaken,
- TransactionId xmin);
extern char *ExportSnapshot(Snapshot snapshot);
dummy_seclabel \
libpq_pipeline \
plsample \
- snapshot_too_old \
spgist_name_ops \
test_bloomfilter \
test_copy_callbacks \
subdir('ldap_password_func')
subdir('libpq_pipeline')
subdir('plsample')
-subdir('snapshot_too_old')
subdir('spgist_name_ops')
subdir('ssl_passphrase_callback')
subdir('test_bloomfilter')
+++ /dev/null
-# Generated subdirectories
-/output_iso/
-/tmp_check_iso/
+++ /dev/null
-# src/test/modules/snapshot_too_old/Makefile
-
-# Note: because we don't tell the Makefile there are any regression tests,
-# we have to clean those result files explicitly
-EXTRA_CLEAN = $(pg_regress_clean_files)
-
-ISOLATION = sto_using_cursor sto_using_select sto_using_hash_index
-ISOLATION_OPTS = --temp-config $(top_srcdir)/src/test/modules/snapshot_too_old/sto.conf
-
-# Disabled because these tests require "old_snapshot_threshold" >= 0, which
-# typical installcheck users do not have (e.g. buildfarm clients).
-NO_INSTALLCHECK = 1
-
-ifdef USE_PGXS
-PG_CONFIG = pg_config
-PGXS := $(shell $(PG_CONFIG) --pgxs)
-include $(PGXS)
-else
-subdir = src/test/modules/snapshot_too_old
-top_builddir = ../../../..
-include $(top_builddir)/src/Makefile.global
-include $(top_srcdir)/contrib/contrib-global.mk
-endif
-
-# But it can nonetheless be very helpful to run tests on preexisting
-# installation, allow to do so, but only if requested explicitly.
-installcheck-force:
- $(pg_isolation_regress_installcheck) $(ISOLATION)
+++ /dev/null
-Parsed test spec with 2 sessions
-
-starting permutation: s1decl s1f1 s1sleep s2u s1f2
-step s1decl: DECLARE cursor1 CURSOR FOR SELECT c FROM sto1;
-step s1f1: FETCH FIRST FROM cursor1;
-c
--
-1
-(1 row)
-
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting|pg_sleep
--------+--------
- 0|
-(1 row)
-
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: FETCH FIRST FROM cursor1;
-ERROR: snapshot too old
+++ /dev/null
-Parsed test spec with 2 sessions
-
-starting permutation: noseq s1f1 s2sleep s2u s1f2
-step noseq: SET enable_seqscan = false;
-step s1f1: SELECT c FROM sto1 where c = 1000;
- c
-----
-1000
-(1 row)
-
-step s2sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting|pg_sleep
--------+--------
- 0|
-(1 row)
-
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1000;
-step s1f2: SELECT c FROM sto1 where c = 1001;
-ERROR: snapshot too old
+++ /dev/null
-Parsed test spec with 2 sessions
-
-starting permutation: s1f1 s1sleep s2u s1f2
-step s1f1: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-c
--
-1
-(1 row)
-
-step s1sleep: SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold';
-setting|pg_sleep
--------+--------
- 0|
-(1 row)
-
-step s2u: UPDATE sto1 SET c = 1001 WHERE c = 1;
-step s1f2: SELECT c FROM sto1 ORDER BY c LIMIT 1;
-ERROR: snapshot too old
+++ /dev/null
-# Copyright (c) 2022-2023, PostgreSQL Global Development Group
-
-tests += {
- 'name': 'snapshot_too_old',
- 'sd': meson.current_source_dir(),
- 'bd': meson.current_build_dir(),
- 'isolation': {
- 'test_kwargs': {'priority': 40}, # sto tests are slow, start early
- 'specs': [
- 'sto_using_cursor',
- 'sto_using_select',
- 'sto_using_hash_index',
- ],
- 'regress_args': ['--temp-config', files('sto.conf')],
- # Disabled because these tests require "old_snapshot_threshold" >= 0, which
- # typical runningcheck users do not have (e.g. buildfarm clients).
- 'runningcheck': false,
- },
-}
+++ /dev/null
-# This test provokes a "snapshot too old" error using a cursor.
-#
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made. With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising value isn't so hard
-# to identify.
-
-setup
-{
- CREATE TABLE sto1 (c int NOT NULL);
- INSERT INTO sto1 SELECT generate_series(1, 1000);
-}
-setup
-{
- VACUUM ANALYZE sto1;
-}
-
-teardown
-{
- DROP TABLE sto1;
-}
-
-session "s1"
-setup { BEGIN ISOLATION LEVEL REPEATABLE READ; }
-step "s1decl" { DECLARE cursor1 CURSOR FOR SELECT c FROM sto1; }
-step "s1f1" { FETCH FIRST FROM cursor1; }
-step "s1sleep" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
-step "s1f2" { FETCH FIRST FROM cursor1; }
-teardown { COMMIT; }
-
-session "s2"
-step "s2u" { UPDATE sto1 SET c = 1001 WHERE c = 1; }
-
-permutation "s1decl" "s1f1" "s1sleep" "s2u" "s1f2"
+++ /dev/null
-# This test is like sto_using_select, except that we test access via a
-# hash index.
-
-setup
-{
- CREATE TABLE sto1 (c int NOT NULL);
- INSERT INTO sto1 SELECT generate_series(1, 1000);
- CREATE INDEX idx_sto1 ON sto1 USING HASH (c);
-}
-setup
-{
- VACUUM ANALYZE sto1;
-}
-
-teardown
-{
- DROP TABLE sto1;
-}
-
-session "s1"
-setup { BEGIN ISOLATION LEVEL REPEATABLE READ; }
-step "noseq" { SET enable_seqscan = false; }
-step "s1f1" { SELECT c FROM sto1 where c = 1000; }
-step "s1f2" { SELECT c FROM sto1 where c = 1001; }
-teardown { ROLLBACK; }
-
-session "s2"
-step "s2sleep" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
-step "s2u" { UPDATE sto1 SET c = 1001 WHERE c = 1000; }
-
-permutation "noseq" "s1f1" "s2sleep" "s2u" "s1f2"
+++ /dev/null
-# This test provokes a "snapshot too old" error using SELECT statements.
-#
-# The sleep is needed because with a threshold of zero a statement could error
-# on changes it made. With more normal settings no external delay is needed,
-# but we don't want these tests to run long enough to see that, since
-# granularity is in minutes.
-#
-# Since results depend on the value of old_snapshot_threshold, sneak that into
-# the line generated by the sleep, so that a surprising value isn't so hard
-# to identify.
-
-setup
-{
- CREATE TABLE sto1 (c int NOT NULL);
- INSERT INTO sto1 SELECT generate_series(1, 1000);
-}
-setup
-{
- VACUUM ANALYZE sto1;
-}
-
-teardown
-{
- DROP TABLE sto1;
-}
-
-session "s1"
-setup { BEGIN ISOLATION LEVEL REPEATABLE READ; }
-step "s1f1" { SELECT c FROM sto1 ORDER BY c LIMIT 1; }
-step "s1sleep" { SELECT setting, pg_sleep(6) FROM pg_settings WHERE name = 'old_snapshot_threshold'; }
-step "s1f2" { SELECT c FROM sto1 ORDER BY c LIMIT 1; }
-teardown { COMMIT; }
-
-session "s2"
-step "s2u" { UPDATE sto1 SET c = 1001 WHERE c = 1; }
-
-permutation "s1f1" "s1sleep" "s2u" "s1f2"
+++ /dev/null
-autovacuum = off
-old_snapshot_threshold = 0
Oid
OidOptions
OkeysState
-OldSnapshotControlData
-OldSnapshotTimeMapping
OldToNewMapping
OldToNewMappingData
OnCommitAction