PublishStartupProcessInformation() to avoid rare hang in recovery.
authorSimon Riggs <[email protected]>
Thu, 8 Sep 2011 11:03:28 +0000 (12:03 +0100)
committerSimon Riggs <[email protected]>
Thu, 8 Sep 2011 11:03:28 +0000 (12:03 +0100)
Bgwriter could cause hang in recovery during page concurrent cleaning.

Bug report and testing by Bernd Helmle, fix by me

src/backend/access/transam/xlog.c
src/backend/storage/lmgr/proc.c
src/include/storage/proc.h

index bb8971ce91ce2701fff3e9d2e7e36decfff7efd6..3da118b14ed9669a92e71a908ecc8b82a0fe93e7 100644 (file)
@@ -44,6 +44,7 @@
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/pmsignal.h"
+#include "storage/proc.h"
 #include "storage/procarray.h"
 #include "storage/smgr.h"
 #include "storage/spin.h"
@@ -5569,6 +5570,7 @@ StartupXLOG(void)
                         */
                        if (InArchiveRecovery && IsUnderPostmaster)
                        {
+                               PublishStartupProcessInformation();
                                SetForwardFsyncRequests();
                                SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
                                bgwriterLaunched = true;
index 4ae977154acc0d30cc289307d2f1f9a1d6aaaa46..3248e136c9a7acba6aa0e7322b1c6c59321aec00 100644 (file)
@@ -1287,12 +1287,53 @@ ProcWaitForSignal(void)
 void
 ProcSendSignal(int pid)
 {
-       PGPROC     *proc = BackendPidGetProc(pid);
+       PGPROC     *proc = NULL;
+
+       proc = BackendPidGetProc(pid);
+
+       if (proc == NULL)
+       {
+               /* use volatile pointer to prevent code rearrangement */
+               volatile PROC_HDR *procglobal = ProcGlobal;
+
+               SpinLockAcquire(ProcStructLock);
+
+               /*
+                * Check to see whether it is the Startup process we wish to signal.
+                * This call is made by the buffer manager when it wishes to wake up a
+                * process that has been waiting for a pin in so it can obtain a
+                * cleanup lock using LockBufferForCleanup(). Startup is not a normal
+                * backend, so BackendPidGetProc() will not return any pid at all. So
+                * we remember the information for this special case.
+                */
+               if (pid == procglobal->startupProcPid)
+                       proc = procglobal->startupProc;
+
+               SpinLockRelease(ProcStructLock);
+       }
 
        if (proc != NULL)
                PGSemaphoreUnlock(&proc->sem);
 }
 
+/*
+ * Record the PID and PGPROC structures for the Startup process, for use in
+ * ProcSendSignal().  See comments there for further explanation.
+ */
+void
+PublishStartupProcessInformation(void)
+{
+       /* use volatile pointer to prevent code rearrangement */
+       volatile PROC_HDR *procglobal = ProcGlobal;
+
+       SpinLockAcquire(ProcStructLock);
+
+       procglobal->startupProc = MyProc;
+       procglobal->startupProcPid = MyProcPid;
+
+       SpinLockRelease(ProcStructLock);
+}
+
 
 /*****************************************************************************
  * SIGALRM interrupt support
index b250d3f0f2d150f2e30542c8891c3d156914eab4..e586572ef86f8ade9039a495c4bb4c828e86d9b6 100644 (file)
@@ -133,6 +133,11 @@ typedef struct PROC_HDR
        PGPROC     *autovacFreeProcs;
        /* Current shared estimate of appropriate spins_per_delay value */
        int                     spins_per_delay;
+
+       /* PGPROC of Startup process */
+       PGPROC     *startupProc;
+       /* Pid of Startup process */
+       int                     startupProcPid;
 } PROC_HDR;
 
 /*
@@ -175,6 +180,7 @@ extern void LockWaitCancel(void);
 
 extern void ProcWaitForSignal(void);
 extern void ProcSendSignal(int pid);
+extern void PublishStartupProcessInformation(void);
 
 extern bool enable_sig_alarm(int delayms, bool is_statement_timeout);
 extern bool disable_sig_alarm(bool is_statement_timeout);