Remove minSafeStartPoint, advance minRecoveryPoint instead. Advance it in

author Heikki Linnakangas <[email protected]>

Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)

committer Heikki Linnakangas <[email protected]>

Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
author Heikki Linnakangas <[email protected]>
Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
committer Heikki Linnakangas <[email protected]>
Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 50be1d504b874f97dca9e5a69131935ca34d0561..c01aa9f7fbd05328096f9810756f5ca175eff6e5 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -339,11 +339,14 @@ typedef struct XLogCtlData
          * here. It's used by the background writer when it wants to create
          * a restartpoint.
          *
-        * is info_lck spinlock a bit too light-weight to protect this?
+        * is info_lck spinlock a bit too light-weight to protect these?
          */
         XLogRecPtr      lastCheckPointRecPtr;
         CheckPoint      lastCheckPoint;
  
+       /* end+1 of the last record replayed (or being replayed) */
+       XLogRecPtr      replayEndRecPtr;
+
         slock_t         info_lck;               /* locks shared variables shown above */
  } XLogCtlData;
  
@@ -415,9 +418,11 @@ static uint32 readRecordBufSize = 0;
  
  /* State information for XLOG reading */
  static XLogRecPtr ReadRecPtr;  /* start of last record read */
-static XLogRecPtr EndRecPtr;   /* end+1 of last record read */
+static XLogRecPtr EndRecPtr;   /* end+1 of last record read. Also in shared mem */
  static XLogRecord *nextRecord = NULL;
  static TimeLineID lastPageTLI = 0;
+static XLogRecPtr minRecoveryPoint; /* local copy of ControlFile->minRecoveryPoint */
+static bool              updateMinRecoveryPoint = true;
  
  static bool InRedo = false;
  
@@ -457,6 +462,7 @@ static void PreallocXlogFiles(XLogRecPtr endptr);
  static void RemoveOldXlogFiles(uint32 log, uint32 seg, XLogRecPtr endptr);
  static void ValidateXLOGDirectoryStructure(void);
  static void CleanupBackupHistory(void);
+static void UpdateMinRecoveryPoint(XLogRecPtr lsn);
  static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode);
  static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
  static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, int whichChkpt);
@@ -1759,6 +1765,55 @@ XLogSetAsyncCommitLSN(XLogRecPtr asyncCommitLSN)
         SpinLockRelease(&xlogctl->info_lck);
  }
  
+static void
+UpdateMinRecoveryPoint(XLogRecPtr lsn)
+{
+       /* Quick check using our local copy of the variable */
+       if (!updateMinRecoveryPoint || XLByteLE(lsn, minRecoveryPoint))
+               return;
+
+       /* XXX
+        * Calculate and write out a new safeStartPoint. This defines
+        * the latest LSN that might appear on-disk while we apply
+        * the WAL records in this file. If we crash during recovery
+        * we must reach this point again before we can prove
+        * database consistency. Not a restartpoint! Restart points
+        * define where we should start recovery from, if we crash.
+        */
+       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+
+       /* update local copy */
+       minRecoveryPoint = ControlFile->minRecoveryPoint;
+
+       /*
+        * An invalid minRecoveryPoint means that we need to recover all the WAL,
+        * ie. crash recovery. Don't update the control file in that case.
+        */
+       if (minRecoveryPoint.xlogid == 0 && minRecoveryPoint.xrecoff == 0)
+               updateMinRecoveryPoint = false;
+       else if (XLByteLT(minRecoveryPoint, lsn))
+       {
+               /* use volatile pointer to prevent code rearrangement */
+               volatile XLogCtlData *xlogctl = XLogCtl;
+
+               /*
+                * We need to update the control file. To avoid having to update it
+                * too often, we update it all the way to EndRecPtr, even though 'lsn'
+                * would suffice for correctness.
+                */
+               SpinLockAcquire(&xlogctl->info_lck);
+               minRecoveryPoint = xlogctl->replayEndRecPtr;
+               SpinLockRelease(&xlogctl->info_lck);
+
+               ControlFile->minRecoveryPoint = minRecoveryPoint;
+               UpdateControlFile();
+       }
+       LWLockRelease(ControlFileLock);
+       
+       elog(LOG, "updated min recovery point to %X/%X",
+                minRecoveryPoint.xlogid, minRecoveryPoint.xrecoff);
+}
+
  /*
   * Ensure that all XLOG data through the given position is flushed to disk.
   *
@@ -1771,9 +1826,12 @@ XLogFlush(XLogRecPtr record)
         XLogRecPtr      WriteRqstPtr;
         XLogwrtRqst WriteRqst;
  
-       /* Disabled during REDO */
+       /* During REDO, we don't try to flush the WAL, but update minRecoveryPoint instead */
         if (IsRecoveryProcessingMode())
+       {
+               UpdateMinRecoveryPoint(record);
                 return;
+       }
  
         /* Quick exit if already known flushed */
         if (XLByteLE(record, LogwrtResult.Flush))
@@ -2450,34 +2508,6 @@ XLogFileRead(uint32 log, uint32 seg, int emode)
                         snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
                                          xlogfname);
                         set_ps_display(activitymsg, false);
-
-                       /* 
-                        * Calculate and write out a new safeStartPoint. This defines
-                        * the latest LSN that might appear on-disk while we apply
-                        * the WAL records in this file. If we crash during recovery
-                        * we must reach this point again before we can prove
-                        * database consistency. Not a restartpoint! Restart points
-                        * define where we should start recovery from, if we crash.
-                        */
-                       if (InArchiveRecovery)
-                       {
-                               XLogRecPtr      nextSegRecPtr;
-                               uint32          nextLog = log;
-                               uint32          nextSeg = seg;
-
-                               NextLogSeg(nextLog, nextSeg);
-                               nextSegRecPtr.xlogid = nextLog;
-                               nextSegRecPtr.xrecoff = nextSeg * XLogSegSize;
-
-                               LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-                               if (XLByteLT(ControlFile->minSafeStartPoint, nextSegRecPtr))
-                               {
-                                       ControlFile->minSafeStartPoint = nextSegRecPtr;
-                                       UpdateControlFile();
-                               }
-                               LWLockRelease(ControlFileLock);
-                       }
-
                         return fd;
                 }
                 if (errno != ENOENT)    /* unexpected failure? */
@@ -2758,7 +2788,8 @@ RestoreArchivedFile(char *path, const char *xlogfname,
          */
         if (shutdown_requested && InRedo)
         {
-               /* XXX: We should update minSafeStartPoint to the exact value here */
+               /* XXX: We should update minRecoveryPoint to the exact value here */
+               UpdateMinRecoveryPoint(EndRecPtr);
                 proc_exit(0);
         }
  
@@ -4947,13 +4978,13 @@ StartupXLOG(void)
         CheckPoint      checkPoint;
         bool            wasShutdown;
         bool            reachedStopPoint = false;
-       bool            reachedSafeStartPoint = false;
+       bool            reachedMinRecoveryPoint = false;
         bool            performedRecovery = false;
         bool            haveBackupLabel = false;
         XLogRecPtr      RecPtr,
                                 LastRec,
                                 checkPointLoc,
-                               minRecoveryLoc,
+                               backupStopLoc,
                                 EndOfLog;
         uint32          endLogId;
         uint32          endLogSeg;
@@ -5042,7 +5073,7 @@ StartupXLOG(void)
                                                 recoveryTargetTLI,
                                                 ControlFile->checkPointCopy.ThisTimeLineID)));
  
-       if (read_backup_label(&checkPointLoc, &minRecoveryLoc))
+       if (read_backup_label(&checkPointLoc, &backupStopLoc))
         {
                 /*
                  * When a backup_label file is present, we want to roll forward from
@@ -5180,12 +5211,18 @@ StartupXLOG(void)
                 ControlFile->prevCheckPoint = ControlFile->checkPoint;
                 ControlFile->checkPoint = checkPointLoc;
                 ControlFile->checkPointCopy = checkPoint;
-               if (minRecoveryLoc.xlogid != 0 || minRecoveryLoc.xrecoff != 0)
-                       ControlFile->minRecoveryPoint = minRecoveryLoc;
+               if (backupStopLoc.xlogid != 0 || backupStopLoc.xrecoff != 0)
+               {
+                       if (XLByteLT(ControlFile->minRecoveryPoint, backupStopLoc))
+                               ControlFile->minRecoveryPoint = backupStopLoc;
+               }
                 ControlFile->time = (pg_time_t) time(NULL);
                 /* No need to hold ControlFileLock yet, we aren't up far enough */
                 UpdateControlFile();
  
+               /* update our local copy of minRecoveryPoint */
+               minRecoveryPoint = ControlFile->minRecoveryPoint;
+
                 /*
                  * Reset pgstat data, because it may be invalid after recovery.
                  */
@@ -5236,20 +5273,25 @@ StartupXLOG(void)
                         bool            recoveryContinue = true;
                         bool            recoveryApply = true;
                         ErrorContextCallback errcontext;
-                       XLogRecPtr      minSafeStartPoint;
+                       /* use volatile pointer to prevent code rearrangement */
+                       volatile XLogCtlData *xlogctl = XLogCtl;
  
                         InRedo = true;
                         ereport(LOG,
                                         (errmsg("redo starts at %X/%X",
                                                         ReadRecPtr.xlogid, ReadRecPtr.xrecoff)));
  
+                       /* Update shared copy of replayEndRecPtr */
+                       SpinLockAcquire(&xlogctl->info_lck);
+                       xlogctl->replayEndRecPtr = ReadRecPtr;
+                       SpinLockRelease(&xlogctl->info_lck);
+
                         /*
-                        * Take a local copy of minSafeStartPoint at the beginning of
-                        * recovery, because it's updated as we go.
+                        * Let postmaster know we've started redo now.
+                        *
+                        * After this point, we can no longer assume that there's no other
+                        * processes running concurrently.
                          */
-                       minSafeStartPoint = ControlFile->minSafeStartPoint;
-
-                       /* Let postmaster know we've started redo now */
                         if (InArchiveRecovery && IsUnderPostmaster)
                                 SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
  
@@ -5285,16 +5327,17 @@ StartupXLOG(void)
                                         /*
                                          * We were requested to exit without finishing recovery.
                                          *
-                                        * XXX: We should update minSafeStartPoint to the exact
+                                        * XXX: We should update minRecoveryPoint to the exact
                                          * value here.
                                          */
+                                       UpdateMinRecoveryPoint(EndRecPtr);
                                         proc_exit(0);
                                 }
  
                                 /*
                                  * Have we reached our safe starting point? If so, we can
                                  * signal postmaster to enter consistent recovery mode.
-                                *
+                                * XXX
                                  * There are two points in the log we must pass. The first is
                                  * the minRecoveryPoint, which is the LSN at the time the
                                  * base backup was taken that we are about to rollfoward from.
@@ -5302,11 +5345,10 @@ StartupXLOG(void)
                                  * another point also: minSafeStartPoint, which is the
                                  * latest LSN that recovery could have reached prior to crash.
                                  */
-                               if (!reachedSafeStartPoint && 
-                                        XLByteLE(minSafeStartPoint, EndRecPtr) && 
-                                        XLByteLE(ControlFile->minRecoveryPoint, EndRecPtr))
+                               if (!reachedMinRecoveryPoint && 
+                                        XLByteLE(minRecoveryPoint, EndRecPtr))
                                 {
-                                       reachedSafeStartPoint = true;
+                                       reachedMinRecoveryPoint = true;
                                         if (InArchiveRecovery)
                                         {
                                                 ereport(LOG,
@@ -5342,6 +5384,11 @@ StartupXLOG(void)
                                         TransactionIdAdvance(ShmemVariableCache->nextXid);
                                 }
  
+                               /* Update shared copy of replayEndRecPtr */
+                               SpinLockAcquire(&xlogctl->info_lck);
+                               xlogctl->replayEndRecPtr = EndRecPtr;
+                               SpinLockRelease(&xlogctl->info_lck);
+
                                 RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
  
                                 /* Pop the error context stack */
@@ -5370,7 +5417,7 @@ StartupXLOG(void)
                         /* there are no WAL records following the checkpoint */
                         ereport(LOG,
                                         (errmsg("redo is not required")));
-                       reachedSafeStartPoint = true;
+                       reachedMinRecoveryPoint = true;
                 }
         }
  
@@ -5386,7 +5433,7 @@ StartupXLOG(void)
          * Complain if we did not roll forward far enough to render the backup
          * dump consistent.
          */
-       if (InRecovery && !reachedSafeStartPoint)
+       if (InRecovery && !reachedMinRecoveryPoint)
         {
                 if (reachedStopPoint)   /* stopped because of stop request */
                         ereport(FATAL,
diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c

index 3bba50ab83bb4ab2171b33417cdc9e87757615f9..4ea849d7f1fec5c7b4a8d53f8f18e4f050f0231f 100644 (file)
--- a/src/bin/pg_controldata/pg_controldata.c
+++ b/src/bin/pg_controldata/pg_controldata.c
@@ -197,9 +197,6 @@ main(int argc, char *argv[])
         printf(_("Minimum recovery ending location:     %X/%X\n"),
                    ControlFile.minRecoveryPoint.xlogid,
                    ControlFile.minRecoveryPoint.xrecoff);
-       printf(_("Minimum safe starting location:       %X/%X\n"),
-                  ControlFile.minSafeStartPoint.xlogid,
-                  ControlFile.minSafeStartPoint.xrecoff);
         printf(_("Maximum data alignment:               %u\n"),
                    ControlFile.maxAlign);
         /* we don't print floatFormat since can't say much useful about it */
diff --git a/src/bin/pg_resetxlog/pg_resetxlog.c b/src/bin/pg_resetxlog/pg_resetxlog.c

index b20d4bd4dd5f7b811be584f0f5980261d3c45610..51cdde11450f4f960ae9d1b770b43655d85894b1 100644 (file)
--- a/src/bin/pg_resetxlog/pg_resetxlog.c
+++ b/src/bin/pg_resetxlog/pg_resetxlog.c
@@ -603,8 +603,6 @@ RewriteControlFile(void)
         ControlFile.prevCheckPoint.xrecoff = 0;
         ControlFile.minRecoveryPoint.xlogid = 0;
         ControlFile.minRecoveryPoint.xrecoff = 0;
-       ControlFile.minSafeStartPoint.xlogid = 0;
-       ControlFile.minSafeStartPoint.xrecoff = 0;
  
         /* Now we can force the recorded xlog seg size to the right thing. */
         ControlFile.xlog_seg_size = XLogSegSize;
diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h

index e69c8ec5530b3a2eea90b1896b6d61aaef9d6b87..275fc1dddf48db920190cbb47146d5be0ac7a00b 100644 (file)
--- a/src/include/catalog/pg_control.h
+++ b/src/include/catalog/pg_control.h
@@ -21,7 +21,7 @@
  
  
  /* Version identifier for this pg_control format */
-#define PG_CONTROL_VERSION     847
+#define PG_CONTROL_VERSION     843
  
  /*
   * Body of CheckPoint XLOG records.  This is declared here because we keep
@@ -102,7 +102,6 @@ typedef struct ControlFileData
         CheckPoint      checkPointCopy; /* copy of last check point record */
  
         XLogRecPtr      minRecoveryPoint;               /* must replay xlog to here */
-       XLogRecPtr      minSafeStartPoint;              /* safe point after recovery crashes */
  
         /*
          * This data is used to check for hardware-architecture compatibility of
author	Heikki Linnakangas <[email protected]>
	Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
committer	Heikki Linnakangas <[email protected]>
	Tue, 3 Feb 2009 12:20:59 +0000 (14:20 +0200)
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/bin/pg_controldata/pg_controldata.c		patch \| blob \| blame \| history
src/bin/pg_resetxlog/pg_resetxlog.c		patch \| blob \| blame \| history
src/include/catalog/pg_control.h		patch \| blob \| blame \| history