Add additional checks while creating the initial decoding snapshot.
authorAmit Kapila <[email protected]>
Mon, 21 Nov 2022 03:24:43 +0000 (08:54 +0530)
committerAmit Kapila <[email protected]>
Mon, 21 Nov 2022 03:24:43 +0000 (08:54 +0530)
As per one of the CI reports, there is an assertion failure which
indicates that we were trying to use an unenforced xmin horizon for
decoding snapshots. Though, we couldn't figure out the reason for
assertion failure these checks would help us in finding the reason if the
problem happens again in the future.

Author: Amit Kapila based on suggestions by Andres Freund
Reviewd by: Andres Freund
Discussion: https://postgr.es/m/CAA4eK1L8wYcyTPxNzPGkhuO52WBGoOZbT0A73Le=ZUWYAYmdfw@mail.gmail.com

src/backend/replication/logical/snapbuild.c
src/backend/replication/walsender.c

index 556b7fcba38ada6258f4cd8cf0d7e80e9aaa314f..a1fd1d92d6370c990878f5ef719ea521bb2781c7 100644 (file)
@@ -566,11 +566,18 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
 {
    Snapshot    snap;
    TransactionId xid;
+   TransactionId safeXid;
    TransactionId *newxip;
    int         newxcnt = 0;
 
-   Assert(!FirstSnapshotSet);
    Assert(XactIsoLevel == XACT_REPEATABLE_READ);
+   Assert(builder->building_full_snapshot);
+
+   /* don't allow older snapshots */
+   InvalidateCatalogSnapshot(); /* about to overwrite MyProc->xmin */
+   if (HaveRegisteredOrActiveSnapshot())
+       elog(ERROR, "cannot build an initial slot snapshot when snapshots exist");
+   Assert(!HistoricSnapshotActive());
 
    if (builder->state != SNAPBUILD_CONSISTENT)
        elog(ERROR, "cannot build an initial slot snapshot before reaching a consistent state");
@@ -588,18 +595,18 @@ SnapBuildInitialSnapshot(SnapBuild *builder)
     * We know that snap->xmin is alive, enforced by the logical xmin
     * mechanism. Due to that we can do this without locks, we're only
     * changing our own value.
+    *
+    * Building an initial snapshot is expensive and an unenforced xmin
+    * horizon would have bad consequences, therefore always double-check that
+    * the horizon is enforced.
     */
-#ifdef USE_ASSERT_CHECKING
-   {
-       TransactionId safeXid;
+   LWLockAcquire(ProcArrayLock, LW_SHARED);
+   safeXid = GetOldestSafeDecodingTransactionId(false);
+   LWLockRelease(ProcArrayLock);
 
-       LWLockAcquire(ProcArrayLock, LW_SHARED);
-       safeXid = GetOldestSafeDecodingTransactionId(false);
-       LWLockRelease(ProcArrayLock);
-
-       Assert(TransactionIdPrecedesOrEquals(safeXid, snap->xmin));
-   }
-#endif
+   if (TransactionIdFollows(safeXid, snap->xmin))
+       elog(ERROR, "cannot build an initial slot snapshot as oldest safe xid %u follows snapshot's xmin %u",
+            safeXid, snap->xmin);
 
    MyProc->xmin = snap->xmin;
 
index a81ef6a2014fb6b51c3738acf04a791f0880b333..c11bb3716f42cf1a295bd81d1da89eaae0abb27e 100644 (file)
@@ -1099,6 +1099,11 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd)
                /*- translator: %s is a CREATE_REPLICATION_SLOT statement */
                        (errmsg("%s must be called in REPEATABLE READ isolation mode transaction",
                                "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));
+           if (!XactReadOnly)
+               ereport(ERROR,
+               /*- translator: %s is a CREATE_REPLICATION_SLOT statement */
+                       (errmsg("%s must be called in a read only transaction",
+                               "CREATE_REPLICATION_SLOT ... (SNAPSHOT 'use')")));
 
            if (FirstSnapshotSet)
                ereport(ERROR,