Remove minSafeStartPoint, advance minRecoveryPoint instead. Advance it in
authorHeikki Linnakangas <[email protected]>
Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
committerHeikki Linnakangas <[email protected]>
Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
XLogFlush, instead of when a WAL file is restored.

src/backend/access/transam/xlog.c
src/bin/pg_controldata/pg_controldata.c
src/bin/pg_resetxlog/pg_resetxlog.c
src/include/catalog/pg_control.h

index 50be1d504b874f97dca9e5a69131935ca34d0561..c01aa9f7fbd05328096f9810756f5ca175eff6e5 100644 (file)
@@ -339,11 +339,14 @@ typedef struct XLogCtlData
         * here. It's used by the background writer when it wants to create
         * a restartpoint.
         *
-        * is info_lck spinlock a bit too light-weight to protect this?
+        * is info_lck spinlock a bit too light-weight to protect these?
         */
        XLogRecPtr      lastCheckPointRecPtr;
        CheckPoint      lastCheckPoint;
 
+       /* end+1 of the last record replayed (or being replayed) */
+       XLogRecPtr      replayEndRecPtr;
+
        slock_t         info_lck;               /* locks shared variables shown above */
 } XLogCtlData;
 
@@ -415,9 +418,11 @@ static uint32 readRecordBufSize = 0;
 
 /* State information for XLOG reading */
 static XLogRecPtr ReadRecPtr;  /* start of last record read */
-static XLogRecPtr EndRecPtr;   /* end+1 of last record read */
+static XLogRecPtr EndRecPtr;   /* end+1 of last record read. Also in shared mem */
 static XLogRecord *nextRecord = NULL;
 static TimeLineID lastPageTLI = 0;
+static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */
+static bool              updateMinRecoveryPoint = true;
 
 static bool InRedo = false;
 
@@ -457,6 +462,7 @@ static void PreallocXlogFiles(XLogRecPtr endptr);
 static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
 static void ValidateXLOGDirectoryStructure(void);
 static void CleanupBackupHistory(void);
+static void UpdateMinRecoveryPoint(XLogRecPtr lsn);
 static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
 static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
 static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@@ -1759,6 +1765,55 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
        SpinLockRelease(&xlogctl->info_lck);
 }
 
+static void
+UpdateMinRecoveryPoint(XLogRecPtr lsn)
+{
+       /* Quick check using our local copy of the variable */
+       if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
+               return;
+
+       /* XXX
+        * Calculate and write out a new safeStartPoint. This defines
+        * the latest LSN that might appear on-disk while we apply
+        * the WAL records in this file. If we crash during recovery
+        * we must reach this point again before we can prove
+        * database consistency. Not a restartpoint! Restart points
+        * define where we should start recovery from, if we crash.
+        */
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+
+       /* update local copy */
+       minRecoveryPoint = ControlFile->minRecoveryPoint;
+
+       /*
+        * An invalid minRecoveryPoint means that we need to recover all the WAL,
+        * ie. crash recovery. Don't update the control file in that case.
+        */
+       if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
+               updateMinRecoveryPoint = false;
+       else if (XLByteLT(minRecoveryPoint, lsn))
+       {
+               /* use volatile pointer to prevent code rearrangement */
+               volatile XLogCtlData *xlogctl = XLogCtl;
+
+               /*
+                * We need to update the control file. To avoid having to update it
+                * too often, we update it all the way to EndRecPtr, even though 'lsn'
+                * would suffice for correctness.
+                */
+               SpinLockAcquire(&xlogctl->info_lck);
+               minRecoveryPoint = xlogctl->replayEndRecPtr;
+               SpinLockRelease(&xlogctl->info_lck);
+
+               ControlFile->minRecoveryPoint = minRecoveryPoint;
+               UpdateControlFile();
+       }
+       LWLockRelease(ControlFileLock);
+       
+       elog(LOG, "updated min recovery point to %X/%X",
+                minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
+}
+
 /*
  * Ensure that all XLOG data through the given position is flushed to disk.
  *
@@ -1771,9 +1826,12 @@ XLogFlush(XLogRecPtr record)
        XLogRecPtr      WriteRqstPtr;
        XLogwrtRqst WriteRqst;
 
-       /* Disabled during REDO */
+       /* During REDO, we don't try to flush the WAL, but update minRecoveryPoint instead */
        if (IsRecoveryProcessingMode())
+       {
+               UpdateMinRecoveryPoint(record);
                return;
+       }
 
        /* Quick exit if already known flushed */
        if (XLByteLE(record, LogwrtResult.Flush))
@@ -2450,34 +2508,6 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
                        snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
                                         xlogfname);
                        set_ps_display(activitymsg, false);
-
-                       /* 
-                        * Calculate and write out a new safeStartPoint. This defines
-                        * the latest LSN that might appear on-disk while we apply
-                        * the WAL records in this file. If we crash during recovery
-                        * we must reach this point again before we can prove
-                        * database consistency. Not a restartpoint! Restart points
-                        * define where we should start recovery from, if we crash.
-                        */
-                       if (InArchiveRecovery)
-                       {
-                               XLogRecPtr      nextSegRecPtr;
-                               uint32          nextLog = log;
-                               uint32          nextSeg = seg;
-
-                               NextLogSeg(nextLog, nextSeg);
-                               nextSegRecPtr.xlogid = nextLog;
-                               nextSegRecPtr.xrecoff = nextSeg * XLogSegSize;
-
-                               LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-                               if (XLByteLT(ControlFile->minSafeStartPoint, nextSegRecPtr))
-                               {
-                                       ControlFile->minSafeStartPoint = nextSegRecPtr;
-                                       UpdateControlFile();
-                               }
-                               LWLockRelease(ControlFileLock);
-                       }
-
                        return fd;
                }
                if (errno != ENOENT)    /* unexpected failure? */
@@ -2758,7 +2788,8 @@ RestoreArchivedFile(char *path, const char *xlogfname,
         */
        if (shutdown_requested && InRedo)
        {
-               /* XXX: We should update minSafeStartPoint to the exact value here */
+               /* XXX: We should update minRecoveryPoint to the exact value here */
+               UpdateMinRecoveryPoint(EndRecPtr);
                proc_exit(0);
        }
 
@@ -4947,13 +4978,13 @@ StartupXLOG(void)
        CheckPoint      checkPoint;
        bool            wasShutdown;
        bool            reachedStopPoint = false;
-       bool            reachedSafeStartPoint = false;
+       bool            reachedMinRecoveryPoint = false;
        bool            performedRecovery = false;
        bool            haveBackupLabel = false;
        XLogRecPtr      RecPtr,
                                LastRec,
                                checkPointLoc,
-                               minRecoveryLoc,
+                               backupStopLoc,
                                EndOfLog;
        uint32          endLogId;
        uint32          endLogSeg;
@@ -5042,7 +5073,7 @@ StartupXLOG(void)
                                                recoveryTargetTLI,
                                                ControlFile->checkPointCopy.ThisTimeLineID)));
 
-       if (read_backup_label(&checkPointLoc, &minRecoveryLoc))
+       if (read_backup_label(&checkPointLoc, &backupStopLoc))
        {
                /*
                 * When a backup_label file is present, we want to roll forward from
@@ -5180,12 +5211,18 @@ StartupXLOG(void)
                ControlFile->prevCheckPoint = ControlFile->checkPoint;
                ControlFile->checkPoint = checkPointLoc;
                ControlFile->checkPointCopy = checkPoint;
-               if (minRecoveryLoc.xlogid != 0 || minRecoveryLoc.xrecoff != 0)
-                       ControlFile->minRecoveryPoint = minRecoveryLoc;
+               if (backupStopLoc.xlogid != 0 || backupStopLoc.xrecoff != 0)
+               {
+                       if (XLByteLT(ControlFile->minRecoveryPoint, backupStopLoc))
+                               ControlFile->minRecoveryPoint = backupStopLoc;
+               }
                ControlFile->time = (pg_time_t) time(NULL);
                /* No need to hold ControlFileLock yet, we aren't up far enough */
                UpdateControlFile();
 
+               /* update our local copy of minRecoveryPoint */
+               minRecoveryPoint = ControlFile->minRecoveryPoint;
+
                /*
                 * Reset pgstat data, because it may be invalid after recovery.
                 */
@@ -5236,20 +5273,25 @@ StartupXLOG(void)
                        bool            recoveryContinue = true;
                        bool            recoveryApply = true;
                        ErrorContextCallback errcontext;
-                       XLogRecPtr      minSafeStartPoint;
+                       /* use volatile pointer to prevent code rearrangement */
+                       volatile XLogCtlData *xlogctl = XLogCtl;
 
                        InRedo = true;
                        ereport(LOG,
                                        (errmsg("redo starts at %X/%X",
                                                        ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
 
+                       /* Update shared copy of replayEndRecPtr */
+                       SpinLockAcquire(&xlogctl->info_lck);
+                       xlogctl->replayEndRecPtr = ReadRecPtr;
+                       SpinLockRelease(&xlogctl->info_lck);
+
                        /*
-                        * Take a local copy of minSafeStartPoint at the beginning of
-                        * recovery, because it's updated as we go.
+                        * Let postmaster know we've started redo now.
+                        *
+                        * After this point, we can no longer assume that there's no other
+                        * processes running concurrently.
                         */
-                       minSafeStartPoint = ControlFile->minSafeStartPoint;
-
-                       /* Let postmaster know we've started redo now */
                        if (InArchiveRecovery && IsUnderPostmaster)
                                SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
 
@@ -5285,16 +5327,17 @@ StartupXLOG(void)
                                        /*
                                         * We were requested to exit without finishing recovery.
                                         *
-                                        * XXX: We should update minSafeStartPoint to the exact
+                                        * XXX: We should update minRecoveryPoint to the exact
                                         * value here.
                                         */
+                                       UpdateMinRecoveryPoint(EndRecPtr);
                                        proc_exit(0);
                                }
 
                                /*
                                 * Have we reached our safe starting point? If so, we can
                                 * signal postmaster to enter consistent recovery mode.
-                                *
+                                * XXX
                                 * There are two points in the log we must pass. The first is
                                 * the minRecoveryPoint, which is the LSN at the time the
                                 * base backup was taken that we are about to rollfoward from.
@@ -5302,11 +5345,10 @@ StartupXLOG(void)
                                 * another point also: minSafeStartPoint, which is the
                                 * latest LSN that recovery could have reached prior to crash.
                                 */
-                               if (!reachedSafeStartPoint && 
-                                        XLByteLE(minSafeStartPoint, EndRecPtr) && 
-                                        XLByteLE(ControlFile->minRecoveryPoint, EndRecPtr))
+                               if (!reachedMinRecoveryPoint && 
+                                        XLByteLE(minRecoveryPoint, EndRecPtr))
                                {
-                                       reachedSafeStartPoint = true;
+                                       reachedMinRecoveryPoint = true;
                                        if (InArchiveRecovery)
                                        {
                                                ereport(LOG,
@@ -5342,6 +5384,11 @@ StartupXLOG(void)
                                        TransactionIdAdvance(ShmemVariableCache->nextXid);
                                }
 
+                               /* Update shared copy of replayEndRecPtr */
+                               SpinLockAcquire(&xlogctl->info_lck);
+                               xlogctl->replayEndRecPtr = EndRecPtr;
+                               SpinLockRelease(&xlogctl->info_lck);
+
                                RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
 
                                /* Pop the error context stack */
@@ -5370,7 +5417,7 @@ StartupXLOG(void)
                        /* there are no WAL records following the checkpoint */
                        ereport(LOG,
                                        (errmsg("redo is not required")));
-                       reachedSafeStartPoint = true;
+                       reachedMinRecoveryPoint = true;
                }
        }
 
@@ -5386,7 +5433,7 @@ StartupXLOG(void)
         * Complain if we did not roll forward far enough to render the backup
         * dump consistent.
         */
-       if (InRecovery && !reachedSafeStartPoint)
+       if (InRecovery && !reachedMinRecoveryPoint)
        {
                if (reachedStopPoint)   /* stopped because of stop request */
                        ereport(FATAL,
index 3bba50ab83bb4ab2171b33417cdc9e87757615f9..4ea849d7f1fec5c7b4a8d53f8f18e4f050f0231f 100644 (file)
@@ -197,9 +197,6 @@ main(int argc, char *argv[])
        printf(_("Minimum recovery ending location:     %X/%X\n"),
                   ControlFile.minRecoveryPoint.xlogid,
                   ControlFile.minRecoveryPoint.xrecoff);
-       printf(_("Minimum safe starting location:       %X/%X\n"),
-                  ControlFile.minSafeStartPoint.xlogid,
-                  ControlFile.minSafeStartPoint.xrecoff);
        printf(_("Maximum data alignment:               %u\n"),
                   ControlFile.maxAlign);
        /* we don't print floatFormat since can't say much useful about it */
index b20d4bd4dd5f7b811be584f0f5980261d3c45610..51cdde11450f4f960ae9d1b770b43655d85894b1 100644 (file)
@@ -603,8 +603,6 @@ RewriteControlFile(void)
        ControlFile.prevCheckPoint.xrecoff = 0;
        ControlFile.minRecoveryPoint.xlogid = 0;
        ControlFile.minRecoveryPoint.xrecoff = 0;
-       ControlFile.minSafeStartPoint.xlogid = 0;
-       ControlFile.minSafeStartPoint.xrecoff = 0;
 
        /* Now we can force the recorded xlog seg size to the right thing. */
        ControlFile.xlog_seg_size = XLogSegSize;
index e69c8ec5530b3a2eea90b1896b6d61aaef9d6b87..275fc1dddf48db920190cbb47146d5be0ac7a00b 100644 (file)
@@ -21,7 +21,7 @@
 
 
 /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION     847
+#define PG_CONTROL_VERSION     843
 
 /*
  * Body of CheckPoint XLOG records.  This is declared here because we keep
@@ -102,7 +102,6 @@ typedef struct ControlFileData
        CheckPoint      checkPointCopy; /* copy of last check point record */
 
        XLogRecPtr      minRecoveryPoint;               /* must replay xlog to here */
-       XLogRecPtr      minSafeStartPoint;              /* safe point after recovery crashes */
 
        /*
         * This data is used to check for hardware-architecture compatibility of