Fix an assertion failure related to an exclusive backup.
authorFujii Masao <[email protected]>
Tue, 17 Jan 2017 08:30:26 +0000 (17:30 +0900)
committerFujii Masao <[email protected]>
Tue, 17 Jan 2017 08:30:26 +0000 (17:30 +0900)
Previously multiple sessions could execute pg_start_backup() and
pg_stop_backup() to start and stop an exclusive backup at the same time.
This could trigger the assertion failure of
"FailedAssertion("!(XLogCtl->Insert.exclusiveBackup)".
This happend because, even while pg_start_backup() was starting
an exclusive backup, other session could run pg_stop_backup()
concurrently and mark the backup as not-in-progress unconditionally.

This patch introduces ExclusiveBackupState indicating the state of
an exclusive backup. This state is used to ensure that there is only
one session running pg_start_backup() or pg_stop_backup() at
the same time, to avoid the assertion failure.

Back-patch to all supported versions.

Author: Michael Paquier
Reviewed-By: Kyotaro Horiguchi and me
Reported-By: Andreas Seltenreich
Discussion: <[email protected]>

src/backend/access/transam/xlog.c

index 720edf75d0e9ae1fac53061a4fb2652db84ff632..e141c963461b7df6f35a4054758af780e1457c6d 100644 (file)
@@ -456,6 +456,29 @@ typedef union WALInsertLockPadded
        char            pad[PG_CACHE_LINE_SIZE];
 } WALInsertLockPadded;
 
+/*
+ * State of an exclusive backup, necessary to control concurrent activities
+ * across sessions when working on exclusive backups.
+ *
+ * EXCLUSIVE_BACKUP_NONE means that there is no exclusive backup actually
+ * running, to be more precise pg_start_backup() is not being executed for
+ * an exclusive backup and there is no exclusive backup in progress.
+ * EXCLUSIVE_BACKUP_STARTING means that pg_start_backup() is starting an
+ * exclusive backup.
+ * EXCLUSIVE_BACKUP_IN_PROGRESS means that pg_start_backup() has finished
+ * running and an exclusive backup is in progress. pg_stop_backup() is
+ * needed to finish it.
+ * EXCLUSIVE_BACKUP_STOPPING means that pg_stop_backup() is stopping an
+ * exclusive backup.
+ */
+typedef enum ExclusiveBackupState
+{
+       EXCLUSIVE_BACKUP_NONE = 0,
+       EXCLUSIVE_BACKUP_STARTING,
+       EXCLUSIVE_BACKUP_IN_PROGRESS,
+       EXCLUSIVE_BACKUP_STOPPING
+} ExclusiveBackupState;
+
 /*
  * Shared state data for WAL insertion.
  */
@@ -497,13 +520,15 @@ typedef struct XLogCtlInsert
        bool            fullPageWrites;
 
        /*
-        * exclusiveBackup is true if a backup started with pg_start_backup() is
-        * in progress, and nonExclusiveBackups is a counter indicating the number
-        * of streaming base backups currently in progress. forcePageWrites is set
-        * to true when either of these is non-zero. lastBackupStart is the latest
-        * checkpoint redo location used as a starting point for an online backup.
+        * exclusiveBackupState indicates the state of an exclusive backup
+        * (see comments of ExclusiveBackupState for more details).
+        * nonExclusiveBackups is a counter indicating the number of streaming
+        * base backups currently in progress. forcePageWrites is set to true
+        * when either of these is non-zero. lastBackupStart is the latest
+        * checkpoint redo location used as a starting point for an online
+        * backup.
         */
-       bool            exclusiveBackup;
+       ExclusiveBackupState exclusiveBackupState;
        int                     nonExclusiveBackups;
        XLogRecPtr      lastBackupStart;
 
@@ -845,6 +870,7 @@ static void xlog_outrec(StringInfo buf, XLogReaderState *record);
 #endif
 static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
 static void pg_start_backup_callback(int code, Datum arg);
+static void pg_stop_backup_callback(int code, Datum arg);
 static bool read_backup_label(XLogRecPtr *checkPointLoc,
                                  bool *backupEndRequired, bool *backupFromStandby);
 static bool read_tablespace_map(List **tablespaces);
@@ -9829,7 +9855,12 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
        WALInsertLockAcquireExclusive();
        if (exclusive)
        {
-               if (XLogCtl->Insert.exclusiveBackup)
+               /*
+                * At first, mark that we're now starting an exclusive backup,
+                * to ensure that there are no other sessions currently running
+                * pg_start_backup() or pg_stop_backup().
+                */
+               if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_NONE)
                {
                        WALInsertLockRelease();
                        ereport(ERROR,
@@ -9837,7 +9868,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
                                         errmsg("a backup is already in progress"),
                                         errhint("Run pg_stop_backup() and try again.")));
                }
-               XLogCtl->Insert.exclusiveBackup = true;
+               XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STARTING;
        }
        else
                XLogCtl->Insert.nonExclusiveBackups++;
@@ -10090,7 +10121,7 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
                {
                        /*
                         * Check for existing backup label --- implies a backup is already
-                        * running.  (XXX given that we checked exclusiveBackup above,
+                        * running.  (XXX given that we checked exclusiveBackupState above,
                         * maybe it would be OK to just unlink any such label file?)
                         */
                        if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
@@ -10173,6 +10204,16 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
        }
        PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive));
 
+       /*
+        * Mark that start phase has correctly finished for an exclusive backup.
+        */
+       if (exclusive)
+       {
+               WALInsertLockAcquireExclusive();
+               XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
+               WALInsertLockRelease();
+       }
+
        /*
         * We're done.  As a convenience, return the starting WAL location.
         */
@@ -10191,8 +10232,8 @@ pg_start_backup_callback(int code, Datum arg)
        WALInsertLockAcquireExclusive();
        if (exclusive)
        {
-               Assert(XLogCtl->Insert.exclusiveBackup);
-               XLogCtl->Insert.exclusiveBackup = false;
+               Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STARTING);
+               XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
        }
        else
        {
@@ -10200,7 +10241,7 @@ pg_start_backup_callback(int code, Datum arg)
                XLogCtl->Insert.nonExclusiveBackups--;
        }
 
-       if (!XLogCtl->Insert.exclusiveBackup &&
+       if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
                XLogCtl->Insert.nonExclusiveBackups == 0)
        {
                XLogCtl->Insert.forcePageWrites = false;
@@ -10208,6 +10249,24 @@ pg_start_backup_callback(int code, Datum arg)
        WALInsertLockRelease();
 }
 
+/*
+ * Error cleanup callback for pg_stop_backup
+ */
+static void
+pg_stop_backup_callback(int code, Datum arg)
+{
+       bool            exclusive = DatumGetBool(arg);
+
+       /* Update backup status on failure */
+       WALInsertLockAcquireExclusive();
+       if (exclusive)
+       {
+               Assert(XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_STOPPING);
+               XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_IN_PROGRESS;
+       }
+       WALInsertLockRelease();
+}
+
 /*
  * do_pg_stop_backup is the workhorse of the user-visible pg_stop_backup()
  * function.
@@ -10270,12 +10329,93 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
                          errmsg("WAL level not sufficient for making an online backup"),
                                 errhint("wal_level must be set to \"archive\", \"hot_standby\", or \"logical\" at server start.")));
 
+
+       if (exclusive)
+       {
+               /*
+                * At first, mark that we're now stopping an exclusive backup,
+                * to ensure that there are no other sessions currently running
+                * pg_start_backup() or pg_stop_backup().
+                */
+               WALInsertLockAcquireExclusive();
+               if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_IN_PROGRESS)
+               {
+                       WALInsertLockRelease();
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                        errmsg("exclusive backup not in progress")));
+               }
+               XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_STOPPING;
+               WALInsertLockRelease();
+
+               /*
+                * Remove backup_label. In case of failure, the state for an exclusive
+                * backup is switched back to in-progress.
+                */
+               PG_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
+               {
+                       /*
+                        * Read the existing label file into memory.
+                        */
+                       struct stat statbuf;
+                       int                     r;
+
+                       if (stat(BACKUP_LABEL_FILE, &statbuf))
+                       {
+                               /* should not happen per the upper checks */
+                               if (errno != ENOENT)
+                                       ereport(ERROR,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not stat file \"%s\": %m",
+                                                                       BACKUP_LABEL_FILE)));
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                                                errmsg("a backup is not in progress")));
+                       }
+
+                       lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
+                       if (!lfp)
+                       {
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not read file \"%s\": %m",
+                                                               BACKUP_LABEL_FILE)));
+                       }
+                       labelfile = palloc(statbuf.st_size + 1);
+                       r = fread(labelfile, statbuf.st_size, 1, lfp);
+                       labelfile[statbuf.st_size] = '\0';
+
+                       /*
+                        * Close and remove the backup label file
+                        */
+                       if (r != 1 || ferror(lfp) || FreeFile(lfp))
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not read file \"%s\": %m",
+                                                               BACKUP_LABEL_FILE)));
+                       if (unlink(BACKUP_LABEL_FILE) != 0)
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not remove file \"%s\": %m",
+                                                               BACKUP_LABEL_FILE)));
+
+                       /*
+                        * Remove tablespace_map file if present, it is created only if there
+                        * are tablespaces.
+                        */
+                       unlink(TABLESPACE_MAP);
+               }
+               PG_END_ENSURE_ERROR_CLEANUP(pg_stop_backup_callback, (Datum) BoolGetDatum(exclusive));
+       }
+
        /*
         * OK to update backup counters and forcePageWrites
         */
        WALInsertLockAcquireExclusive();
        if (exclusive)
-               XLogCtl->Insert.exclusiveBackup = false;
+       {
+               XLogCtl->Insert.exclusiveBackupState = EXCLUSIVE_BACKUP_NONE;
+       }
        else
        {
                /*
@@ -10288,66 +10428,13 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
                XLogCtl->Insert.nonExclusiveBackups--;
        }
 
-       if (!XLogCtl->Insert.exclusiveBackup &&
+       if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
                XLogCtl->Insert.nonExclusiveBackups == 0)
        {
                XLogCtl->Insert.forcePageWrites = false;
        }
        WALInsertLockRelease();
 
-       if (exclusive)
-       {
-               /*
-                * Read the existing label file into memory.
-                */
-               struct stat statbuf;
-               int                     r;
-
-               if (stat(BACKUP_LABEL_FILE, &statbuf))
-               {
-                       if (errno != ENOENT)
-                               ereport(ERROR,
-                                               (errcode_for_file_access(),
-                                                errmsg("could not stat file \"%s\": %m",
-                                                               BACKUP_LABEL_FILE)));
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                        errmsg("a backup is not in progress")));
-               }
-
-               lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
-               if (!lfp)
-               {
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not read file \"%s\": %m",
-                                                       BACKUP_LABEL_FILE)));
-               }
-               labelfile = palloc(statbuf.st_size + 1);
-               r = fread(labelfile, statbuf.st_size, 1, lfp);
-               labelfile[statbuf.st_size] = '\0';
-
-               /*
-                * Close and remove the backup label file
-                */
-               if (r != 1 || ferror(lfp) || FreeFile(lfp))
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not read file \"%s\": %m",
-                                                       BACKUP_LABEL_FILE)));
-               if (unlink(BACKUP_LABEL_FILE) != 0)
-                       ereport(ERROR,
-                                       (errcode_for_file_access(),
-                                        errmsg("could not remove file \"%s\": %m",
-                                                       BACKUP_LABEL_FILE)));
-
-               /*
-                * Remove tablespace_map file if present, it is created only if there
-                * are tablespaces.
-                */
-               unlink(TABLESPACE_MAP);
-       }
-
        /*
         * Read and parse the START WAL LOCATION line (this code is pretty crude,
         * but we are not expecting any variability in the file format).
@@ -10584,7 +10671,7 @@ do_pg_abort_backup(void)
        Assert(XLogCtl->Insert.nonExclusiveBackups > 0);
        XLogCtl->Insert.nonExclusiveBackups--;
 
-       if (!XLogCtl->Insert.exclusiveBackup &&
+       if (XLogCtl->Insert.exclusiveBackupState == EXCLUSIVE_BACKUP_NONE &&
                XLogCtl->Insert.nonExclusiveBackups == 0)
        {
                XLogCtl->Insert.forcePageWrites = false;