From 8eace46d34ab6ac0d887aa4d3504bc4222c2e448 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Thu, 19 Jan 2017 18:23:09 -0300 Subject: [PATCH] Fix race condition in reading commit timestamps MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit If a user requests the commit timestamp for a transaction old enough that its data is concurrently being truncated away by vacuum at just the right time, they would receive an ugly internal file-not-found error message from slru.c rather than the expected NULL return value. In a primary server, the window for the race is very small: the lookup has to occur exactly between the two calls by vacuum, and there's not a lot that happens between them (mostly just a multixact truncate). In a standby server, however, the window is larger because the truncation is executed as soon as the WAL record for it is replayed, but the advance of the oldest-Xid is not executed until the next checkpoint record. To fix in the primary, simply reverse the order of operations in vac_truncate_clog. To fix in the standby, augment the WAL truncation record so that the standby is aware of the new oldest-XID value and can apply the update immediately. WAL version bumped because of this. No backpatch, because of the low importance of the bug and its rarity. Author: Craig Ringer Reviewed-By: Petr Jelínek, Peter Eisentraut Discussion: https://postgr.es/m/CAMsr+YFhVtRQT1VAwC+WGbbxZZRzNou=N9Ed-FrCqkwQ8H8oJQ@mail.gmail.com --- src/backend/access/rmgrdesc/committsdesc.c | 6 +++--- src/backend/access/transam/commit_ts.c | 21 +++++++++++++-------- src/backend/commands/vacuum.c | 10 +++++++++- src/include/access/commit_ts.h | 8 ++++++++ src/include/access/xlog_internal.h | 2 +- 5 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/backend/access/rmgrdesc/committsdesc.c b/src/backend/access/rmgrdesc/committsdesc.c index 86f23405aa6..3e670bd5438 100644 --- a/src/backend/access/rmgrdesc/committsdesc.c +++ b/src/backend/access/rmgrdesc/committsdesc.c @@ -33,10 +33,10 @@ commit_ts_desc(StringInfo buf, XLogReaderState *record) } else if (info == COMMIT_TS_TRUNCATE) { - int pageno; + xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) rec; - memcpy(&pageno, rec, sizeof(int)); - appendStringInfo(buf, "%d", pageno); + appendStringInfo(buf, "pageno %d, oldestXid %u", + trunc->pageno, trunc->oldestXid); } else if (info == COMMIT_TS_SETTS) { diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 2403de3ae36..18a5f5602c7 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -113,7 +113,7 @@ static bool CommitTsPagePrecedes(int page1, int page2); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno); +static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid); static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, RepOriginId nodeid); @@ -824,7 +824,7 @@ TruncateCommitTs(TransactionId oldestXact) return; /* nothing to remove */ /* Write XLOG record */ - WriteTruncateXlogRec(cutoffPage); + WriteTruncateXlogRec(cutoffPage, oldestXact); /* Now we can remove the old CommitTs segment(s) */ SimpleLruTruncate(CommitTsCtl, cutoffPage); @@ -910,10 +910,15 @@ WriteZeroPageXlogRec(int pageno) * Write a TRUNCATE xlog record */ static void -WriteTruncateXlogRec(int pageno) +WriteTruncateXlogRec(int pageno, TransactionId oldestXid) { + xl_commit_ts_truncate xlrec; + + xlrec.pageno = pageno; + xlrec.oldestXid = oldestXid; + XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&xlrec), SizeOfCommitTsTruncate); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE); } @@ -967,17 +972,17 @@ commit_ts_redo(XLogReaderState *record) } else if (info == COMMIT_TS_TRUNCATE) { - int pageno; + xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) XLogRecGetData(record); - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + AdvanceOldestCommitTsXid(trunc->oldestXid); /* * During XLOG replay, latest_page_number isn't set up yet; insert a * suitable value to bypass the sanity test in SimpleLruTruncate. */ - CommitTsCtl->shared->latest_page_number = pageno; + CommitTsCtl->shared->latest_page_number = trunc->pageno; - SimpleLruTruncate(CommitTsCtl, pageno); + SimpleLruTruncate(CommitTsCtl, trunc->pageno); } else if (info == COMMIT_TS_SETTS) { diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 0f72c1c36ad..812fb4a48fe 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -1152,6 +1152,15 @@ vac_truncate_clog(TransactionId frozenXID, if (bogus) return; + /* + * Advance the oldest value for commit timestamps before truncating, so + * that if a user requests a timestamp for a transaction we're truncating + * away right after this point, they get NULL instead of an ugly "file not + * found" error from slru.c. This doesn't matter for xact/multixact + * because they are not subject to arbitrary lookups from users. + */ + AdvanceOldestCommitTsXid(frozenXID); + /* * Truncate CLOG, multixact and CommitTs to the oldest computed value. */ @@ -1167,7 +1176,6 @@ vac_truncate_clog(TransactionId frozenXID, */ SetTransactionIdLimit(frozenXID, oldestxid_datoid); SetMultiXactIdLimit(minMulti, minmulti_datoid); - AdvanceOldestCommitTsXid(frozenXID); } diff --git a/src/include/access/commit_ts.h b/src/include/access/commit_ts.h index 98bd938d5be..f172c91d8f3 100644 --- a/src/include/access/commit_ts.h +++ b/src/include/access/commit_ts.h @@ -61,6 +61,14 @@ typedef struct xl_commit_ts_set #define SizeOfCommitTsSet (offsetof(xl_commit_ts_set, mainxid) + \ sizeof(TransactionId)) +typedef struct xl_commit_ts_truncate +{ + int pageno; + TransactionId oldestXid; +} xl_commit_ts_truncate; + +#define SizeOfCommitTsTruncate (offsetof(xl_commit_ts_truncate, oldestXid) + \ + sizeof(TransactionId)) extern void commit_ts_redo(XLogReaderState *record); extern void commit_ts_desc(StringInfo buf, XLogReaderState *record); diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index e0fcd056776..8ad4d47d127 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -31,7 +31,7 @@ /* * Each page of XLOG file has a header like this: */ -#define XLOG_PAGE_MAGIC 0xD093 /* can be used as WAL version indicator */ +#define XLOG_PAGE_MAGIC 0xD094 /* can be used as WAL version indicator */ typedef struct XLogPageHeaderData { -- 2.30.2