Prevent "snapshot too old" from trying to return pruned TOAST tuples.
authorRobert Haas <[email protected]>
Wed, 3 Aug 2016 20:41:43 +0000 (16:41 -0400)
committerRobert Haas <[email protected]>
Wed, 3 Aug 2016 20:50:01 +0000 (16:50 -0400)
Previously, we tested for MVCC snapshots to see whether they were too
old, but not TOAST snapshots, which can lead to complaints about missing
TOAST chunks if those chunks are subject to early pruning.  Ideally,
the threshold lsn and timestamp for a TOAST snapshot would be that of
the corresponding MVCC snapshot, but since we have no way of deciding
which MVCC snapshot was used to fetch the TOAST pointer, use the oldest
active or registered snapshot instead.

Reported by Andres Freund, who also sketched out what the fix should
look like.  Patch by me, reviewed by Amit Kapila.

src/backend/access/heap/tuptoaster.c
src/backend/utils/time/snapmgr.c
src/backend/utils/time/tqual.c
src/include/storage/bufmgr.h
src/include/utils/snapmgr.h
src/include/utils/tqual.h

index 4cffd21d180e373309b11df85b7f0c74074df950..55b6b41f8c7fe9068000e660582de29c2837ed5b 100644 (file)
@@ -40,6 +40,7 @@
 #include "utils/expandeddatum.h"
 #include "utils/fmgroids.h"
 #include "utils/rel.h"
+#include "utils/snapmgr.h"
 #include "utils/typcache.h"
 #include "utils/tqual.h"
 
@@ -81,6 +82,7 @@ static int toast_open_indexes(Relation toastrel,
                   int *num_indexes);
 static void toast_close_indexes(Relation *toastidxs, int num_indexes,
                    LOCKMODE lock);
+static void init_toast_snapshot(Snapshot toast_snapshot);
 
 
 /* ----------
@@ -1665,6 +1667,7 @@ toast_delete_datum(Relation rel, Datum value)
    HeapTuple   toasttup;
    int         num_indexes;
    int         validIndex;
+   SnapshotData    SnapshotToast;
 
    if (!VARATT_IS_EXTERNAL_ONDISK(attr))
        return;
@@ -1696,8 +1699,9 @@ toast_delete_datum(Relation rel, Datum value)
     * sequence or not, but since we've already locked the index we might as
     * well use systable_beginscan_ordered.)
     */
+   init_toast_snapshot(&SnapshotToast);
    toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
-                                          SnapshotToast, 1, &toastkey);
+                                          &SnapshotToast, 1, &toastkey);
    while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
    {
        /*
@@ -1730,6 +1734,7 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
    int         num_indexes;
    int         validIndex;
    Relation   *toastidxs;
+   SnapshotData    SnapshotToast;
 
    /* Fetch a valid index relation */
    validIndex = toast_open_indexes(toastrel,
@@ -1748,9 +1753,10 @@ toastrel_valueid_exists(Relation toastrel, Oid valueid)
    /*
     * Is there any such chunk?
     */
+   init_toast_snapshot(&SnapshotToast);
    toastscan = systable_beginscan(toastrel,
                                   RelationGetRelid(toastidxs[validIndex]),
-                                  true, SnapshotToast, 1, &toastkey);
+                                  true, &SnapshotToast, 1, &toastkey);
 
    if (systable_getnext(toastscan) != NULL)
        result = true;
@@ -1813,6 +1819,7 @@ toast_fetch_datum(struct varlena * attr)
    int32       chunksize;
    int         num_indexes;
    int         validIndex;
+   SnapshotData    SnapshotToast;
 
    if (!VARATT_IS_EXTERNAL_ONDISK(attr))
        elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
@@ -1859,8 +1866,9 @@ toast_fetch_datum(struct varlena * attr)
     */
    nextidx = 0;
 
+   init_toast_snapshot(&SnapshotToast);
    toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
-                                          SnapshotToast, 1, &toastkey);
+                                          &SnapshotToast, 1, &toastkey);
    while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
    {
        /*
@@ -1990,6 +1998,7 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
    int32       chcpyend;
    int         num_indexes;
    int         validIndex;
+   SnapshotData    SnapshotToast;
 
    if (!VARATT_IS_EXTERNAL_ONDISK(attr))
        elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
@@ -2082,9 +2091,10 @@ toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
     *
     * The index is on (valueid, chunkidx) so they will come in order
     */
+   init_toast_snapshot(&SnapshotToast);
    nextidx = startchunk;
    toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
-                                        SnapshotToast, nscankeys, toastkey);
+                                        &SnapshotToast, nscankeys, toastkey);
    while ((ttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
    {
        /*
@@ -2289,3 +2299,22 @@ toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
        index_close(toastidxs[i], lock);
    pfree(toastidxs);
 }
+
+/* ----------
+ * init_toast_snapshot
+ *
+ * Initialize an appropriate TOAST snapshot.  We must use an MVCC snapshot
+ * to initialize the TOAST snapshot; since we don't know which one to use,
+ * just use the oldest one.  This is safe: at worst, we will get a "snapshot
+ * too old" error that might have been avoided otherwise.
+ */
+static void
+init_toast_snapshot(Snapshot toast_snapshot)
+{
+   Snapshot    snapshot = GetOldestSnapshot();
+
+   if (snapshot == NULL)
+       elog(ERROR, "no known snapshots");
+
+   InitToastSnapshot(toast_snapshot, snapshot->lsn, snapshot->whenTaken);
+}
index e1caf01c603b4b0559086dc8960a29f38b100f23..4bddaed33ba91653df87a471332c444fd0597fef 100644 (file)
@@ -188,6 +188,9 @@ typedef struct ActiveSnapshotElt
 /* Top of the stack of active snapshots */
 static ActiveSnapshotElt *ActiveSnapshot = NULL;
 
+/* Bottom of the stack of active snapshots */
+static ActiveSnapshotElt *OldestActiveSnapshot = NULL;
+
 /*
  * Currently registered Snapshots.  Ordered in a heap by xmin, so that we can
  * quickly find the one with lowest xmin, to advance our MyPgXat->xmin.
@@ -393,6 +396,34 @@ GetLatestSnapshot(void)
    return SecondarySnapshot;
 }
 
+/*
+ * GetOldestSnapshot
+ *
+ *     Get the oldest known snapshot, as judged by the LSN.
+ */
+Snapshot
+GetOldestSnapshot(void)
+{
+   Snapshot    OldestRegisteredSnapshot = NULL;
+   XLogRecPtr  RegisteredLSN = InvalidXLogRecPtr;
+   XLogRecPtr  ActiveLSN = InvalidXLogRecPtr;
+
+   if (!pairingheap_is_empty(&RegisteredSnapshots))
+   {
+       OldestRegisteredSnapshot = pairingheap_container(SnapshotData, ph_node,
+                                   pairingheap_first(&RegisteredSnapshots));
+       RegisteredLSN = OldestRegisteredSnapshot->lsn;
+   }
+
+   if (OldestActiveSnapshot != NULL)
+       ActiveLSN = OldestActiveSnapshot->as_snap->lsn;
+
+   if (XLogRecPtrIsInvalid(RegisteredLSN) || RegisteredLSN > ActiveLSN)
+       return OldestActiveSnapshot->as_snap;
+
+   return OldestRegisteredSnapshot;
+}
+
 /*
  * GetCatalogSnapshot
  *     Get a snapshot that is sufficiently up-to-date for scan of the
@@ -674,6 +705,8 @@ PushActiveSnapshot(Snapshot snap)
    newactive->as_snap->active_count++;
 
    ActiveSnapshot = newactive;
+   if (OldestActiveSnapshot == NULL)
+       OldestActiveSnapshot = ActiveSnapshot;
 }
 
 /*
@@ -744,6 +777,8 @@ PopActiveSnapshot(void)
 
    pfree(ActiveSnapshot);
    ActiveSnapshot = newstack;
+   if (ActiveSnapshot == NULL)
+       OldestActiveSnapshot = NULL;
 
    SnapshotResetXmin();
 }
@@ -953,6 +988,8 @@ AtSubAbort_Snapshot(int level)
        pfree(ActiveSnapshot);
 
        ActiveSnapshot = next;
+       if (ActiveSnapshot == NULL)
+           OldestActiveSnapshot = NULL;
    }
 
    SnapshotResetXmin();
@@ -1037,6 +1074,7 @@ AtEOXact_Snapshot(bool isCommit)
     * it'll go away with TopTransactionContext.
     */
    ActiveSnapshot = NULL;
+   OldestActiveSnapshot = NULL;
    pairingheap_reset(&RegisteredSnapshots);
 
    CurrentSnapshot = NULL;
index 2960455d6e3261dd5b116a761d4ada42dbdede1f..d99c847000ea80431bf3f455d6fc2276960f8fcf 100644 (file)
@@ -78,7 +78,6 @@
 /* Static variables representing various special snapshot semantics */
 SnapshotData SnapshotSelfData = {HeapTupleSatisfiesSelf};
 SnapshotData SnapshotAnyData = {HeapTupleSatisfiesAny};
-SnapshotData SnapshotToastData = {HeapTupleSatisfiesToast};
 
 /* local functions */
 static bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot);
index 3d5dea7efb553f75815f3e93a9d34d61abbeb29c..fcd0c75b1c73b577085d0e8651b96ffe426f3058 100644 (file)
@@ -279,7 +279,8 @@ TestForOldSnapshot(Snapshot snapshot, Relation relation, Page page)
 
    if (old_snapshot_threshold >= 0
        && (snapshot) != NULL
-       && (snapshot)->satisfies == HeapTupleSatisfiesMVCC
+       && ((snapshot)->satisfies == HeapTupleSatisfiesMVCC
+           || (snapshot)->satisfies == HeapTupleSatisfiesToast)
        && !XLogRecPtrIsInvalid((snapshot)->lsn)
        && PageGetLSN(page) > (snapshot)->lsn)
        TestForOldSnapshot_impl(snapshot, relation);
index e9414274afd6084fec8aa8e16dd3d7e133c93ef9..9e3827249e2144e48ce7e0fa24477fae84867037 100644 (file)
@@ -64,6 +64,7 @@ extern TransactionId RecentGlobalDataXmin;
 extern Snapshot GetTransactionSnapshot(void);
 extern Snapshot GetLatestSnapshot(void);
 extern void SnapshotSetCommandId(CommandId curcid);
+extern Snapshot GetOldestSnapshot(void);
 
 extern Snapshot GetCatalogSnapshot(Oid relid);
 extern Snapshot GetNonHistoricCatalogSnapshot(Oid relid);
index 244594495df45ad1a7b00c216529a086f01c1cfa..8041e7b67fec091a58140132f0d26ae9298d90f2 100644 (file)
 #define TQUAL_H
 
 #include "utils/snapshot.h"
+#include "access/xlogdefs.h"
 
 
 /* Static variables representing various special snapshot semantics */
 extern PGDLLIMPORT SnapshotData SnapshotSelfData;
 extern PGDLLIMPORT SnapshotData SnapshotAnyData;
-extern PGDLLIMPORT SnapshotData SnapshotToastData;
 extern PGDLLIMPORT SnapshotData CatalogSnapshotData;
 
 #define SnapshotSelf       (&SnapshotSelfData)
 #define SnapshotAny            (&SnapshotAnyData)
-#define SnapshotToast      (&SnapshotToastData)
-
-/*
- * We don't provide a static SnapshotDirty variable because it would be
- * non-reentrant.  Instead, users of that snapshot type should declare a
- * local variable of type SnapshotData, and initialize it with this macro.
- */
-#define InitDirtySnapshot(snapshotdata)  \
-   ((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
 
 /* This macro encodes the knowledge of which snapshots are MVCC-safe */
 #define IsMVCCSnapshot(snapshot)  \
@@ -100,4 +91,25 @@ extern bool ResolveCminCmaxDuringDecoding(struct HTAB *tuplecid_data,
                              HeapTuple htup,
                              Buffer buffer,
                              CommandId *cmin, CommandId *cmax);
+
+/*
+ * We don't provide a static SnapshotDirty variable because it would be
+ * non-reentrant.  Instead, users of that snapshot type should declare a
+ * local variable of type SnapshotData, and initialize it with this macro.
+ */
+#define InitDirtySnapshot(snapshotdata)  \
+   ((snapshotdata).satisfies = HeapTupleSatisfiesDirty)
+
+/*
+ * Similarly, some initialization is required for SnapshotToast.  We need
+ * to set lsn and whenTaken correctly to support snapshot_too_old.
+ */
+static inline void
+InitToastSnapshot(Snapshot snapshot, XLogRecPtr lsn, int64 whenTaken)
+{
+   snapshot->satisfies = HeapTupleSatisfiesToast;
+   snapshot->lsn = lsn;
+   snapshot->whenTaken = whenTaken;
+}
+
 #endif   /* TQUAL_H */