Prevent concurrent SimpleLruTruncate() for any given SLRU.

author Noah Misch <[email protected]>

Sat, 15 Aug 2020 17:15:53 +0000 (10:15 -0700)

committer Noah Misch <[email protected]>

Sat, 15 Aug 2020 17:15:56 +0000 (10:15 -0700)
author Noah Misch <[email protected]>
Sat, 15 Aug 2020 17:15:53 +0000 (10:15 -0700)
committer Noah Misch <[email protected]>
Sat, 15 Aug 2020 17:15:56 +0000 (10:15 -0700)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index 791c747638715bd2e6a5f389dc50a5d095d30268..25478510f3aaaa88c951ff89826da4184e5c30d8 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9095,7 +9095,8 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
     and general database objects (identified by class OID and object OID,
     in the same way as in <structname>pg_description</structname> or
     <structname>pg_depend</structname>).  Also, the right to extend a
-   relation is represented as a separate lockable object.
+   relation is represented as a separate lockable object, as is the right to
+   update <structname>pg_database</structname>.<structfield>datfrozenxid</structfield>.
     Also, <quote>advisory</quote> locks can be taken on numbers that have
     user-defined meanings.
    </para>
@@ -9121,6 +9122,7 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
         Type of the lockable object:
         <literal>relation</literal>,
         <literal>extend</literal>,
+       <literal>frozenid</literal>,
         <literal>page</literal>,
         <literal>tuple</literal>,
         <literal>transactionid</literal>,
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml

index 4e87de2236ab7aaa3cc2ebf056d821de42564096..0de92cd1dd81537fd6545484f2736b4076561bbf 100644 (file)
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -885,7 +885,7 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
  
        <tbody>
         <row>
-        <entry morerows="64"><literal>LWLock</literal></entry>
+        <entry morerows="66"><literal>LWLock</literal></entry>
          <entry><literal>ShmemIndexLock</literal></entry>
          <entry>Waiting to find or allocate space in shared memory.</entry>
         </row>
@@ -1079,6 +1079,16 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
           <entry>Waiting to execute <function>txid_status</function> or update
           the oldest transaction id available to it.</entry>
          </row>
+        <row>
+         <entry><literal>WrapLimitsVacuumLock</literal></entry>
+         <entry>Waiting to update limits on transaction id and multixact
+         consumption.</entry>
+        </row>
+        <row>
+         <entry><literal>NotifyQueueTailLock</literal></entry>
+         <entry>Waiting to update limit on notification message
+         storage.</entry>
+        </row>
          <row>
           <entry><literal>clog</literal></entry>
           <entry>Waiting for I/O on a clog (transaction status) buffer.</entry>
@@ -1169,7 +1179,7 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
           counters during Parallel Hash plan execution.</entry>
          </row>
          <row>
-         <entry morerows="9"><literal>Lock</literal></entry>
+         <entry morerows="10"><literal>Lock</literal></entry>
           <entry><literal>relation</literal></entry>
           <entry>Waiting to acquire a lock on a relation.</entry>
          </row>
@@ -1177,6 +1187,12 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
           <entry><literal>extend</literal></entry>
           <entry>Waiting to extend a relation.</entry>
          </row>
+        <row>
+         <entry><literal>frozenid</literal></entry>
+         <entry>Waiting to
+         update <structname>pg_database</structname>.<structfield>datfrozenxid</structfield>
+         and <structname>pg_database</structname>.<structfield>datminmxid</structfield>.</entry>
+        </row>
          <row>
           <entry><literal>page</literal></entry>
           <entry>Waiting to acquire a lock on page of a relation.</entry>
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c

index 3903a130a1f9446328cd55d37d8fce68696de8b1..f78a90f333658ec42d039373d196bbf8a745c1db 100644 (file)
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1180,6 +1180,14 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
  
  /*
   * Remove all segments before the one holding the passed page number
+ *
+ * All SLRUs prevent concurrent calls to this function, either with an LWLock
+ * or by calling it only as part of a checkpoint.  Mutual exclusion must begin
+ * before computing cutoffPage.  Mutual exclusion must end after any limit
+ * update that would permit other backends to write fresh data into the
+ * segment immediately preceding the one containing cutoffPage.  Otherwise,
+ * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
+ * after it has accrued freshly-written data.
   */
  void
  SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c

index e667fd023850abd137db922834c7a63298a91266..81594271b6146a880b3a1946cf21c6d29d027445 100644 (file)
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -349,8 +349,8 @@ ExtendSUBTRANS(TransactionId newestXact)
  /*
   * Remove all SUBTRANS segments before the one holding the passed transaction ID
   *
- * This is normally called during checkpoint, with oldestXact being the
- * oldest TransactionXmin of any running transaction.
+ * oldestXact is the oldest TransactionXmin of any running transaction.  This
+ * is called only during checkpoint.
   */
  void
  TruncateSUBTRANS(TransactionId oldestXact)
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c

index fb944911ee9948b630bf790cea2b4af6e5996dfc..8e1c821cc5d83f54eb3556904872acd0841ad32f 100644 (file)
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -223,19 +223,22 @@ typedef struct QueueBackendStatus
  /*
   * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
   *
- * The AsyncQueueControl structure is protected by the AsyncQueueLock.
+ * The AsyncQueueControl structure is protected by the AsyncQueueLock and
+ * NotifyQueueTailLock.
   *
- * When holding the lock in SHARED mode, backends may only inspect their own
- * entries as well as the head and tail pointers. Consequently we can allow a
- * backend to update its own record while holding only SHARED lock (since no
- * other backend will inspect it).
+ * When holding AsyncQueueLock in SHARED mode, backends may only inspect their
+ * own entries as well as the head and tail pointers. Consequently we can
+ * allow a backend to update its own record while holding only SHARED lock
+ * (since no other backend will inspect it).
   *
- * When holding the lock in EXCLUSIVE mode, backends can inspect the entries
- * of other backends and also change the head and tail pointers.
+ * When holding AsyncQueueLock in EXCLUSIVE mode, backends can inspect the
+ * entries of other backends and also change the head pointer. When holding
+ * both AsyncQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends can
+ * change the tail pointer.
   *
   * AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
- * In order to avoid deadlocks, whenever we need both locks, we always first
- * get AsyncQueueLock and then AsyncCtlLock.
+ * In order to avoid deadlocks, whenever we need multiple locks, we first get
+ * NotifyQueueTailLock, then AsyncQueueLock, and lastly AsyncCtlLock.
   *
   * Each backend uses the backend[] array entry with index equal to its
   * BackendId (which can range from 1 to MaxBackends).  We rely on this to make
@@ -2012,6 +2015,10 @@ asyncQueueAdvanceTail(void)
     int         newtailpage;
     int         boundary;
  
+   /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+   LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
+
+   /* Compute the new tail. */
     LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
     min = QUEUE_HEAD;
     for (i = 1; i <= MaxBackends; i++)
@@ -2020,7 +2027,6 @@ asyncQueueAdvanceTail(void)
             min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
     }
     oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
-   QUEUE_TAIL = min;
     LWLockRelease(AsyncQueueLock);
  
     /*
@@ -2040,6 +2046,17 @@ asyncQueueAdvanceTail(void)
          */
         SimpleLruTruncate(AsyncCtl, newtailpage);
     }
+
+   /*
+    * Advertise the new tail.  This changes asyncQueueIsFull()'s verdict for
+    * the segment immediately prior to the new tail, allowing fresh data into
+    * that segment.
+    */
+   LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
+   QUEUE_TAIL = min;
+   LWLockRelease(AsyncQueueLock);
+
+   LWLockRelease(NotifyQueueTailLock);
  }
  
  /*
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 7c2ef9333521781347cad550f86d895fdb43316b..f5a0b600f9e58972fa7635d55ddb3e07838b904a 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1295,6 +1295,14 @@ vac_update_datfrozenxid(void)
     bool        bogus = false;
     bool        dirty = false;
  
+   /*
+    * Restrict this task to one backend per database.  This avoids race
+    * conditions that would move datfrozenxid or datminmxid backward.  It
+    * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
+    * datfrozenxid passed to an earlier vac_truncate_clog() call.
+    */
+   LockDatabaseFrozenIds(ExclusiveLock);
+
     /*
      * Initialize the "min" calculation with GetOldestXmin, which is a
      * reasonable approximation to the minimum relfrozenxid for not-yet-
@@ -1484,6 +1492,9 @@ vac_truncate_clog(TransactionId frozenXID,
     bool        bogus = false;
     bool        frozenAlreadyWrapped = false;
  
+   /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+   LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
+
     /* init oldest datoids to sync with my frozenXID/minMulti values */
     oldestxid_datoid = MyDatabaseId;
     minmulti_datoid = MyDatabaseId;
@@ -1593,6 +1604,8 @@ vac_truncate_clog(TransactionId frozenXID,
      */
     SetTransactionIdLimit(frozenXID, oldestxid_datoid);
     SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+
+   LWLockRelease(WrapLimitsVacuumLock);
  }
  
  
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c

index 889841f4cc55ae38ae50b47bf6216da22a5761e3..863a06845cc68eb9dc00506e735f4dcc80aeb1e0 100644 (file)
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -460,6 +460,21 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
     LockRelease(&tag, lockmode, false);
  }
  
+/*
+ *     LockDatabaseFrozenIds
+ *
+ * This allows one backend per database to execute vac_update_datfrozenxid().
+ */
+void
+LockDatabaseFrozenIds(LOCKMODE lockmode)
+{
+   LOCKTAG     tag;
+
+   SET_LOCKTAG_DATABASE_FROZEN_IDS(tag, MyDatabaseId);
+
+   (void) LockAcquire(&tag, lockmode, false, false);
+}
+
  /*
   *     LockPage
   *
@@ -1098,6 +1113,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
                              tag->locktag_field2,
                              tag->locktag_field1);
             break;
+       case LOCKTAG_DATABASE_FROZEN_IDS:
+           appendStringInfo(buf,
+                            _("pg_database.datfrozenxid of database %u"),
+                            tag->locktag_field1);
+           break;
         case LOCKTAG_PAGE:
             appendStringInfo(buf,
                              _("page %u of relation %u of database %u"),
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt

index db478432291b6fd4f65092c29a202bb5f1199f11..9cfa7ef9c32ea47bd10b3cb6e79703341fa29f1e 100644 (file)
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -49,3 +49,6 @@ MultiXactTruncationLock               41
  OldSnapshotTimeMapLock             42
  LogicalRepWorkerLock               43
  CLogTruncationLock                 44
+# 45 was CLogTruncationLock until removal of BackendRandomLock
+WrapLimitsVacuumLock               46
+NotifyQueueTailLock                    47
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c

index ffd1970f589bc95bd432a2dff3a5680f63b1ba49..bb1add7dcfb9a8a3ffc4bd101a0d419afa5dd917 100644 (file)
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -26,6 +26,7 @@
  const char *const LockTagTypeNames[] = {
     "relation",
     "extend",
+   "frozenid",
     "page",
     "tuple",
     "transactionid",
@@ -245,6 +246,17 @@ pg_lock_status(PG_FUNCTION_ARGS)
                 nulls[8] = true;
                 nulls[9] = true;
                 break;
+           case LOCKTAG_DATABASE_FROZEN_IDS:
+               values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+               nulls[2] = true;
+               nulls[3] = true;
+               nulls[4] = true;
+               nulls[5] = true;
+               nulls[6] = true;
+               nulls[7] = true;
+               nulls[8] = true;
+               nulls[9] = true;
+               break;
             case LOCKTAG_PAGE:
                 values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
                 values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h

index 099e18f2b757b3e95cd3e797914c4c1eadaf93dd..3f42000bda82172f1d0545a04f20dc662fb5d9c9 100644 (file)
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -59,6 +59,9 @@ extern bool ConditionalLockRelationForExtension(Relation relation,
                                                 LOCKMODE lockmode);
  extern int RelationExtensionLockWaiterCount(Relation relation);
  
+/* Lock to recompute pg_database.datfrozenxid in the current database */
+extern void LockDatabaseFrozenIds(LOCKMODE lockmode);
+
  /* Lock a page (currently only used within indexes) */
  extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
  extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h

index 986bb6433a0557db8dee32fd4cb6bb6556525d32..5dc7f873680ec73bf557bb476d5f9db6a23609b0 100644 (file)
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -139,6 +139,7 @@ typedef enum LockTagType
  {
     LOCKTAG_RELATION,           /* whole relation */
     LOCKTAG_RELATION_EXTEND,    /* the right to extend a relation */
+   LOCKTAG_DATABASE_FROZEN_IDS,    /* pg_database.datfrozenxid */
     LOCKTAG_PAGE,               /* one page of a relation */
     LOCKTAG_TUPLE,              /* one physical tuple */
     LOCKTAG_TRANSACTION,        /* transaction (for waiting for xact done) */
@@ -195,6 +196,15 @@ typedef struct LOCKTAG
      (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
      (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
  
+/* ID info for frozen IDs is DB OID */
+#define SET_LOCKTAG_DATABASE_FROZEN_IDS(locktag,dboid) \
+   ((locktag).locktag_field1 = (dboid), \
+    (locktag).locktag_field2 = 0, \
+    (locktag).locktag_field3 = 0, \
+    (locktag).locktag_field4 = 0, \
+    (locktag).locktag_type = LOCKTAG_DATABASE_FROZEN_IDS, \
+    (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
  /* ID info for a page is RELATION info + BlockNumber */
  #define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
     ((locktag).locktag_field1 = (dboid), \
author	Noah Misch <[email protected]>
	Sat, 15 Aug 2020 17:15:53 +0000 (10:15 -0700)
committer	Noah Misch <[email protected]>
	Sat, 15 Aug 2020 17:15:56 +0000 (10:15 -0700)
doc/src/sgml/catalogs.sgml		patch \| blob \| blame \| history
doc/src/sgml/monitoring.sgml		patch \| blob \| blame \| history
src/backend/access/transam/slru.c		patch \| blob \| blame \| history
src/backend/access/transam/subtrans.c		patch \| blob \| blame \| history
src/backend/commands/async.c		patch \| blob \| blame \| history
src/backend/commands/vacuum.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/lmgr.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/lwlocknames.txt		patch \| blob \| blame \| history
src/backend/utils/adt/lockfuncs.c		patch \| blob \| blame \| history
src/include/storage/lmgr.h		patch \| blob \| blame \| history
src/include/storage/lock.h		patch \| blob \| blame \| history