Add GUC to log long wait times on recovery conflicts.

author Fujii Masao <[email protected]>

Thu, 7 Jan 2021 15:47:03 +0000 (00:47 +0900)

committer Fujii Masao <[email protected]>

Thu, 7 Jan 2021 15:47:03 +0000 (00:47 +0900)
author Fujii Masao <[email protected]>
Thu, 7 Jan 2021 15:47:03 +0000 (00:47 +0900)
committer Fujii Masao <[email protected]>
Thu, 7 Jan 2021 15:47:03 +0000 (00:47 +0900)
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml

index 15b94c96c084ddc1bd79308a9ee8c946fb54846b..7c0a673a8dd42c433d12934d25beede27eb18cd3 100644 (file)
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6952,6 +6952,28 @@ log_line_prefix = '%m [%p] %q%u@%d/%a '
        </listitem>
       </varlistentry>
  
+     <varlistentry id="guc-log-recovery-conflict-waits" xreflabel="log_recovery_conflict_waits">
+      <term><varname>log_recovery_conflict_waits</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>log_recovery_conflict_waits</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Controls whether a log message is produced when the startup process
+        is waiting longer than <varname>deadlock_timeout</varname>
+        for recovery conflicts.  This is useful in determining if recovery
+        conflicts prevent the recovery from applying WAL.
+       </para>
+
+       <para>
+        The default is <literal>off</literal>.  This parameter can only be set
+        in the <filename>postgresql.conf</filename> file or on the server
+        command line.
+       </para>
+      </listitem>
+     </varlistentry>
+
       <varlistentry id="guc-log-parameter-max-length" xreflabel="log_parameter_max_length">
        <term><varname>log_parameter_max_length</varname> (<type>integer</type>)
        <indexterm>
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml

index 04f75020e44cbf0142cd1e808f3c8ba71db76c52..efc382cb8d2477d88df5b4933641ebd814bc24c8 100644 (file)
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -2071,6 +2071,12 @@ if (!triggered)
      server. The <structname>pg_stat_database</structname> system view also contains
      summary information.
     </para>
+
+   <para>
+    Users can control whether a log message is produced when WAL replay is waiting
+    longer than <varname>deadlock_timeout</varname> for conflicts. This
+    is controlled by the <xref linkend="guc-log-recovery-conflict-waits"/> parameter.
+   </para>
    </sect2>
  
    <sect2 id="hot-standby-admin">
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c

index 8f2c482bc84331ecabda2a7bb642083ee9885735..71b5852224fcca27500442cec1aa47a887fdef2f 100644 (file)
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3809,6 +3809,8 @@ LockBufferForCleanup(Buffer buffer)
  {
     BufferDesc *bufHdr;
     char       *new_status = NULL;
+   TimestampTz waitStart = 0;
+   bool        logged_recovery_conflict = false;
  
     Assert(BufferIsPinned(buffer));
     Assert(PinCountWaitBuf == NULL);
@@ -3882,6 +3884,34 @@ LockBufferForCleanup(Buffer buffer)
                 new_status[len] = '\0'; /* truncate off " waiting" */
             }
  
+           /*
+            * Emit the log message if the startup process is waiting longer
+            * than deadlock_timeout for recovery conflict on buffer pin.
+            *
+            * Skip this if first time through because the startup process has
+            * not started waiting yet in this case. So, the wait start
+            * timestamp is set after this logic.
+            */
+           if (waitStart != 0 && !logged_recovery_conflict)
+           {
+               TimestampTz now = GetCurrentTimestamp();
+
+               if (TimestampDifferenceExceeds(waitStart, now,
+                                              DeadlockTimeout))
+               {
+                   LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_BUFFERPIN,
+                                       waitStart, now, NULL);
+                   logged_recovery_conflict = true;
+               }
+           }
+
+           /*
+            * Set the wait start timestamp if logging is enabled and first
+            * time through.
+            */
+           if (log_recovery_conflict_waits && waitStart == 0)
+               waitStart = GetCurrentTimestamp();
+
             /* Publish the bufid that Startup process waits on */
             SetStartupBufferPinWaitBufId(buffer - 1);
             /* Set alarm and then wait to be signaled by UnpinBuffer() */
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c

index 77e81bdbc0f5e2dc8abe78a0468575bbd10884e1..d4b0f65ba20bac675816d886df42c4b93871f053 100644 (file)
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -39,6 +39,7 @@
  int            vacuum_defer_cleanup_age;
  int            max_standby_archive_delay = 30 * 1000;
  int            max_standby_streaming_delay = 30 * 1000;
+bool       log_recovery_conflict_waits = false;
  
  static HTAB *RecoveryLockLists;
  
@@ -53,6 +54,7 @@ static void ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlis
  static void SendRecoveryConflictWithBufferPin(ProcSignalReason reason);
  static XLogRecPtr LogCurrentRunningXacts(RunningTransactions CurrRunningXacts);
  static void LogAccessExclusiveLocks(int nlocks, xl_standby_lock *locks);
+static const char *get_recovery_conflict_desc(ProcSignalReason reason);
  
  /*
   * Keep track of all the locks owned by a given transaction.
@@ -218,15 +220,83 @@ WaitExceedsMaxStandbyDelay(uint32 wait_event_info)
     return false;
  }
  
+/*
+ * Log the recovery conflict.
+ *
+ * wait_start is the timestamp when the caller started to wait.
+ * now is the timestamp when this function has been called.
+ * wait_list is the list of virtual transaction ids assigned to
+ * conflicting processes.
+ */
+void
+LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
+                   TimestampTz now, VirtualTransactionId *wait_list)
+{
+   long        secs;
+   int         usecs;
+   long        msecs;
+   StringInfoData buf;
+   int         nprocs = 0;
+
+   TimestampDifference(wait_start, now, &secs, &usecs);
+   msecs = secs * 1000 + usecs / 1000;
+   usecs = usecs % 1000;
+
+   if (wait_list)
+   {
+       VirtualTransactionId *vxids;
+
+       /* Construct a string of list of the conflicting processes */
+       vxids = wait_list;
+       while (VirtualTransactionIdIsValid(*vxids))
+       {
+           PGPROC     *proc = BackendIdGetProc(vxids->backendId);
+
+           /* proc can be NULL if the target backend is not active */
+           if (proc)
+           {
+               if (nprocs == 0)
+               {
+                   initStringInfo(&buf);
+                   appendStringInfo(&buf, "%d", proc->pid);
+               }
+               else
+                   appendStringInfo(&buf, ", %d", proc->pid);
+
+               nprocs++;
+           }
+
+           vxids++;
+       }
+   }
+
+   /*
+    * If wait_list is specified, report the list of PIDs of active
+    * conflicting backends in a detail message. Note that if all the backends
+    * in the list are not active, no detail message is logged.
+    */
+   ereport(LOG,
+           errmsg("recovery still waiting after %ld.%03d ms: %s",
+                  msecs, usecs, _(get_recovery_conflict_desc(reason))),
+           nprocs > 0 ? errdetail_log_plural("Conflicting process: %s.",
+                                             "Conflicting processes: %s.",
+                                             nprocs, buf.data) : 0);
+
+   if (nprocs > 0)
+       pfree(buf.data);
+}
+
  /*
   * This is the main executioner for any query backend that conflicts with
   * recovery processing. Judgement has already been passed on it within
   * a specific rmgr. Here we just issue the orders to the procs. The procs
   * then throw the required error as instructed.
   *
- * If report_waiting is true, "waiting" is reported in PS display if necessary.
- * If the caller has already reported that, report_waiting should be false.
- * Otherwise, "waiting" is reported twice unexpectedly.
+ * If report_waiting is true, "waiting" is reported in PS display and the
+ * wait for recovery conflict is reported in the log, if necessary. If
+ * the caller is responsible for reporting them, report_waiting should be
+ * false. Otherwise, both the caller and this function report the same
+ * thing unexpectedly.
   */
  static void
  ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
@@ -234,15 +304,16 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                                        bool report_waiting)
  {
     TimestampTz waitStart = 0;
-   char       *new_status;
+   char       *new_status = NULL;
+   bool        logged_recovery_conflict = false;
  
     /* Fast exit, to avoid a kernel call if there's no work to be done. */
     if (!VirtualTransactionIdIsValid(*waitlist))
         return;
  
-   if (report_waiting)
+   /* Set the wait start timestamp for reporting */
+   if (report_waiting && (log_recovery_conflict_waits || update_process_title))
         waitStart = GetCurrentTimestamp();
-   new_status = NULL;          /* we haven't changed the ps display */
  
     while (VirtualTransactionIdIsValid(*waitlist))
     {
@@ -252,25 +323,6 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
         /* wait until the virtual xid is gone */
         while (!VirtualXactLock(*waitlist, false))
         {
-           /*
-            * Report via ps if we have been waiting for more than 500 msec
-            * (should that be configurable?)
-            */
-           if (update_process_title && new_status == NULL && report_waiting &&
-               TimestampDifferenceExceeds(waitStart, GetCurrentTimestamp(),
-                                          500))
-           {
-               const char *old_status;
-               int         len;
-
-               old_status = get_ps_display(&len);
-               new_status = (char *) palloc(len + 8 + 1);
-               memcpy(new_status, old_status, len);
-               strcpy(new_status + len, " waiting");
-               set_ps_display(new_status);
-               new_status[len] = '\0'; /* truncate off " waiting" */
-           }
-
             /* Is it time to kill it? */
             if (WaitExceedsMaxStandbyDelay(wait_event_info))
             {
@@ -289,6 +341,50 @@ ResolveRecoveryConflictWithVirtualXIDs(VirtualTransactionId *waitlist,
                 if (pid != 0)
                     pg_usleep(5000L);
             }
+
+           if (waitStart != 0 && (!logged_recovery_conflict || new_status == NULL))
+           {
+               TimestampTz now = 0;
+               bool        maybe_log_conflict;
+               bool        maybe_update_title;
+
+               maybe_log_conflict = (log_recovery_conflict_waits && !logged_recovery_conflict);
+               maybe_update_title = (update_process_title && new_status == NULL);
+
+               /* Get the current timestamp if not report yet */
+               if (maybe_log_conflict || maybe_update_title)
+                   now = GetCurrentTimestamp();
+
+               /*
+                * Report via ps if we have been waiting for more than 500
+                * msec (should that be configurable?)
+                */
+               if (maybe_update_title &&
+                   TimestampDifferenceExceeds(waitStart, now, 500))
+               {
+                   const char *old_status;
+                   int         len;
+
+                   old_status = get_ps_display(&len);
+                   new_status = (char *) palloc(len + 8 + 1);
+                   memcpy(new_status, old_status, len);
+                   strcpy(new_status + len, " waiting");
+                   set_ps_display(new_status);
+                   new_status[len] = '\0'; /* truncate off " waiting" */
+               }
+
+               /*
+                * Emit the log message if the startup process is waiting
+                * longer than deadlock_timeout for recovery conflict on
+                * buffer pin.
+                */
+               if (maybe_log_conflict &&
+                   TimestampDifferenceExceeds(waitStart, now, DeadlockTimeout))
+               {
+                   LogRecoveryConflict(reason, waitStart, now, waitlist);
+                   logged_recovery_conflict = true;
+               }
+           }
         }
  
         /* The virtual transaction is gone now, wait for the next one */
@@ -405,9 +501,18 @@ ResolveRecoveryConflictWithDatabase(Oid dbid)
   * hot-standby backend processes. If deadlock_timeout is reached in
   * this function, all the backends holding the conflicting locks are
   * requested to check themselves for deadlocks.
+ *
+ * logging_conflict should be true if the recovery conflict has not been
+ * logged yet even though logging is enabled. After deadlock_timeout is
+ * reached and the request for deadlock check is sent, we wait again to
+ * be signaled by the release of the lock if logging_conflict is false.
+ * Otherwise we return without waiting again so that the caller can report
+ * the recovery conflict. In this case, then, this function is called again
+ * with logging_conflict=false (because the recovery conflict has already
+ * been logged) and we will wait again for the lock to be released.
   */
  void
-ResolveRecoveryConflictWithLock(LOCKTAG locktag)
+ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict)
  {
     TimestampTz ltime;
  
@@ -494,6 +599,15 @@ ResolveRecoveryConflictWithLock(LOCKTAG locktag)
             backends++;
         }
  
+       /*
+        * Exit if the recovery conflict has not been logged yet even though
+        * logging is enabled, so that the caller can log that. Then
+        * RecoveryConflictWithLock() is called again and we will wait again
+        * for the lock to be released.
+        */
+       if (logging_conflict)
+           goto cleanup;
+
         /*
          * Wait again here to be signaled by the release of the Relation Lock,
          * to prevent the subsequent RecoveryConflictWithLock() from causing
@@ -1209,3 +1323,36 @@ LogStandbyInvalidations(int nmsgs, SharedInvalidationMessage *msgs,
                      nmsgs * sizeof(SharedInvalidationMessage));
     XLogInsert(RM_STANDBY_ID, XLOG_INVALIDATIONS);
  }
+
+/* Return the description of recovery conflict */
+static const char *
+get_recovery_conflict_desc(ProcSignalReason reason)
+{
+   const char *reasonDesc = gettext_noop("unknown reason");
+
+   switch (reason)
+   {
+       case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+           reasonDesc = gettext_noop("recovery conflict on buffer pin");
+           break;
+       case PROCSIG_RECOVERY_CONFLICT_LOCK:
+           reasonDesc = gettext_noop("recovery conflict on lock");
+           break;
+       case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+           reasonDesc = gettext_noop("recovery conflict on tablespace");
+           break;
+       case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+           reasonDesc = gettext_noop("recovery conflict on snapshot");
+           break;
+       case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+           reasonDesc = gettext_noop("recovery conflict on buffer deadlock");
+           break;
+       case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+           reasonDesc = gettext_noop("recovery conflict on database");
+           break;
+       default:
+           break;
+   }
+
+   return reasonDesc;
+}
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c

index 0366a7cc004e4198e58164e7fb56b8271b1881a5..db0cfaa360031dca712f4092765c0a68e6bc0f87 100644 (file)
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1064,8 +1064,10 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
     LWLock     *partitionLock = LockHashPartitionLock(hashcode);
     PROC_QUEUE *waitQueue = &(lock->waitProcs);
     LOCKMASK    myHeldLocks = MyProc->heldLocks;
+   TimestampTz standbyWaitStart = 0;
     bool        early_deadlock = false;
     bool        allow_autovacuum_cancel = true;
+   bool        logged_recovery_conflict = false;
     ProcWaitStatus myWaitStatus;
     PGPROC     *proc;
     PGPROC     *leader = MyProc->lockGroupLeader;
@@ -1261,6 +1263,14 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
         else
             enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout);
     }
+   else if (log_recovery_conflict_waits)
+   {
+       /*
+        * Set the wait start timestamp if logging is enabled and in hot
+        * standby.
+        */
+       standbyWaitStart = GetCurrentTimestamp();
+   }
  
     /*
      * If somebody wakes us between LWLockRelease and WaitLatch, the latch
@@ -1280,8 +1290,42 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
     {
         if (InHotStandby)
         {
-           /* Set a timer and wait for that or for the Lock to be granted */
-           ResolveRecoveryConflictWithLock(locallock->tag.lock);
+           bool        maybe_log_conflict =
+           (standbyWaitStart != 0 && !logged_recovery_conflict);
+
+           /* Set a timer and wait for that or for the lock to be granted */
+           ResolveRecoveryConflictWithLock(locallock->tag.lock,
+                                           maybe_log_conflict);
+
+           /*
+            * Emit the log message if the startup process is waiting longer
+            * than deadlock_timeout for recovery conflict on lock.
+            */
+           if (maybe_log_conflict)
+           {
+               TimestampTz now = GetCurrentTimestamp();
+
+               if (TimestampDifferenceExceeds(standbyWaitStart, now,
+                                              DeadlockTimeout))
+               {
+                   VirtualTransactionId *vxids;
+                   int         cnt;
+
+                   vxids = GetLockConflicts(&locallock->tag.lock,
+                                            AccessExclusiveLock, &cnt);
+
+                   /*
+                    * Log the recovery conflict and the list of PIDs of
+                    * backends holding the conflicting lock. Note that we do
+                    * logging even if there are no such backends right now
+                    * because the startup process here has already waited
+                    * longer than deadlock_timeout.
+                    */
+                   LogRecoveryConflict(PROCSIG_RECOVERY_CONFLICT_LOCK,
+                                       standbyWaitStart, now, cnt > 0 ? vxids : NULL);
+                   logged_recovery_conflict = true;
+               }
+           }
         }
         else
         {
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index daf9c127cde679917bd1ad27143230edb4f02a8d..17579eeaca9c6a0851a4fd2062266b14c72dc0fe 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1573,7 +1573,15 @@ static struct config_bool ConfigureNamesBool[] =
         false,
         NULL, NULL, NULL
     },
-
+   {
+       {"log_recovery_conflict_waits", PGC_SIGHUP, LOGGING_WHAT,
+           gettext_noop("Logs standby recovery conflict waits."),
+           NULL
+       },
+       &log_recovery_conflict_waits,
+       false,
+       NULL, NULL, NULL
+   },
     {
         {"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
             gettext_noop("Logs the host name in the connection logs."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample

index 033aa335a012b0cf85c83bd94833164bc7f607da..8930a94fff5a5b71f3d1c92b6eed3ee4bfa3f6cd 100644 (file)
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -552,6 +552,8 @@
                     #   %% = '%'
                     # e.g. '<%u%%%d> '
  #log_lock_waits = off          # log lock waits >= deadlock_timeout
+#log_recovery_conflict_waits = off # log standby recovery conflict waits
+                   # >= deadlock_timeout
  #log_parameter_max_length = -1     # when logging statements, limit logged
                     # bind-parameter values to N bytes;
                     # -1 means print in full, 0 disables
diff --git a/src/include/storage/standby.h b/src/include/storage/standby.h

index 9b5cc01c97b53f311e004f39f26b4966af96a34e..a0f3e0bdf09e14c0fe4521a1d8fd0d431436cceb 100644 (file)
--- a/src/include/storage/standby.h
+++ b/src/include/storage/standby.h
@@ -23,6 +23,7 @@
  extern int vacuum_defer_cleanup_age;
  extern int max_standby_archive_delay;
  extern int max_standby_streaming_delay;
+extern bool log_recovery_conflict_waits;
  
  extern void InitRecoveryTransactionEnvironment(void);
  extern void ShutdownRecoveryTransactionEnvironment(void);
@@ -32,12 +33,14 @@ extern void ResolveRecoveryConflictWithSnapshot(TransactionId latestRemovedXid,
  extern void ResolveRecoveryConflictWithTablespace(Oid tsid);
  extern void ResolveRecoveryConflictWithDatabase(Oid dbid);
  
-extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag);
+extern void ResolveRecoveryConflictWithLock(LOCKTAG locktag, bool logging_conflict);
  extern void ResolveRecoveryConflictWithBufferPin(void);
  extern void CheckRecoveryConflictDeadlock(void);
  extern void StandbyDeadLockHandler(void);
  extern void StandbyTimeoutHandler(void);
  extern void StandbyLockTimeoutHandler(void);
+extern void LogRecoveryConflict(ProcSignalReason reason, TimestampTz wait_start,
+                               TimestampTz cur_ts, VirtualTransactionId *wait_list);
  
  /*
   * Standby Rmgr (RM_STANDBY_ID)
author	Fujii Masao <[email protected]>
	Thu, 7 Jan 2021 15:47:03 +0000 (00:47 +0900)
committer	Fujii Masao <[email protected]>
	Thu, 7 Jan 2021 15:47:03 +0000 (00:47 +0900)
doc/src/sgml/config.sgml		patch \| blob \| blame \| history
doc/src/sgml/high-availability.sgml		patch \| blob \| blame \| history
src/backend/storage/buffer/bufmgr.c		patch \| blob \| blame \| history
src/backend/storage/ipc/standby.c		patch \| blob \| blame \| history
src/backend/storage/lmgr/proc.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc.c		patch \| blob \| blame \| history
src/backend/utils/misc/postgresql.conf.sample		patch \| blob \| blame \| history
src/include/storage/standby.h		patch \| blob \| blame \| history