Add more LOG messages when starting and ending recovery from a backup
authorMichael Paquier <[email protected]>
Thu, 25 Jan 2024 08:07:56 +0000 (17:07 +0900)
committerMichael Paquier <[email protected]>
Thu, 25 Jan 2024 08:07:56 +0000 (17:07 +0900)
Three LOG messages are added in the recovery code paths, providing
information that can be useful to track corruption issues depending on
the state of the cluster, telling that:
- Recovery has started from a backup_label.
- Recovery is restarting from a backup start LSN, without a
backup_label.
- Recovery has completed from a backup.

Author: Andres Freund
Reviewed-by: David Steele, Laurenz Albe, Michael Paquier
Discussion: https://postgr.es/m/20231117041811[email protected]

src/backend/access/transam/xlogrecovery.c

index 1b48d7171a49e64bc1b9fec6b24cc520b4c4ff16..0bb472da278407e5a1cba6e4731f1567ef59ba3e 100644 (file)
@@ -603,6 +603,22 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
        if (StandbyModeRequested)
            EnableStandbyMode();
 
+       /*
+        * Omitting backup_label when creating a new replica, PITR node etc.
+        * unfortunately is a common cause of corruption.  Logging that
+        * backup_label was used makes it a bit easier to exclude that as the
+        * cause of observed corruption.
+        *
+        * Do so before we try to read the checkpoint record (which can fail),
+        * as otherwise it can be hard to understand why a checkpoint other
+        * than ControlFile->checkPoint is used.
+        */
+       ereport(LOG,
+               (errmsg("starting backup recovery with redo LSN %X/%X, checkpoint LSN %X/%X, on timeline ID %u",
+                       LSN_FORMAT_ARGS(RedoStartLSN),
+                       LSN_FORMAT_ARGS(CheckPointLoc),
+                       CheckPointTLI)));
+
        /*
         * When a backup_label file is present, we want to roll forward from
         * the checkpoint it identifies, rather than using pg_control.
@@ -742,6 +758,16 @@ InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
                EnableStandbyMode();
        }
 
+       /*
+        * For the same reason as when starting up with backup_label present,
+        * emit a log message when we continue initializing from a base
+        * backup.
+        */
+       if (!XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
+           ereport(LOG,
+                   (errmsg("restarting backup recovery with redo LSN %X/%X",
+                           LSN_FORMAT_ARGS(ControlFile->backupStartPoint))));
+
        /* Get the last valid checkpoint record. */
        CheckPointLoc = ControlFile->checkPoint;
        CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
@@ -2155,6 +2181,9 @@ CheckRecoveryConsistency(void)
    if (!XLogRecPtrIsInvalid(backupEndPoint) &&
        backupEndPoint <= lastReplayedEndRecPtr)
    {
+       XLogRecPtr  saveBackupStartPoint = backupStartPoint;
+       XLogRecPtr  saveBackupEndPoint = backupEndPoint;
+
        elog(DEBUG1, "end of backup reached");
 
        /*
@@ -2165,6 +2194,11 @@ CheckRecoveryConsistency(void)
        backupStartPoint = InvalidXLogRecPtr;
        backupEndPoint = InvalidXLogRecPtr;
        backupEndRequired = false;
+
+       ereport(LOG,
+               (errmsg("completed backup recovery with redo LSN %X/%X and end LSN %X/%X",
+                       LSN_FORMAT_ARGS(saveBackupStartPoint),
+                       LSN_FORMAT_ARGS(saveBackupEndPoint))));
    }
 
    /*