action_at_recovery_target recovery config option
authorSimon Riggs <[email protected]>
Tue, 25 Nov 2014 20:13:30 +0000 (20:13 +0000)
committerSimon Riggs <[email protected]>
Tue, 25 Nov 2014 20:13:30 +0000 (20:13 +0000)
action_at_recovery_target = pause | promote | shutdown

Petr Jelinek

Reviewed by Muhammad Asif Naeem, Fujji Masao and
Simon Riggs

doc/src/sgml/recovery-config.sgml
src/backend/access/transam/xlog.c
src/backend/postmaster/postmaster.c
src/include/access/xlog_internal.h

index 0f1ff343a6c8da2bc17ac1cc415683ae48f7b16b..a145a3fee2b8f0998416611bfcab8e674d8966ca 100644 (file)
@@ -289,12 +289,39 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
       </term>
       <listitem>
        <para>
-        Specifies whether recovery should pause when the recovery target
-        is reached. The default is true.
-        This is intended to allow queries to be executed against the
-        database to check if this recovery target is the most desirable
-        point for recovery. The paused state can be resumed by using
-        <function>pg_xlog_replay_resume()</> (See
+        Alias for action_at_recovery_target, <literal>true</> is same as
+        action_at_recovery_target = <literal>pause</> and <literal>false</>
+        is same as action_at_recovery_target = <literal>promote</>.
+       </para>
+       <para>
+        This setting has no effect if <xref linkend="guc-hot-standby"> is not
+        enabled, or if no recovery target is set.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     </variablelist>
+
+     <varlistentry id="action-at-recovery-target"
+                   xreflabel="action_at_recovery_target">
+      <term><varname>action_at_recovery_target</varname> (<type>enum</type>)
+      <indexterm>
+        <primary><varname>action_at_recovery_target</> recovery parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies what action the server should take once the recovery target is
+        reached. The default is <literal>pause</>, which means recovery will
+        be paused. <literal>promote</> means recovery process will finish and
+        the server will start to accept connections.
+        Finally <literal>shutdown</> will stop the server after reaching the
+        recovery target.
+       </para>
+        The intended use of <literal>pause</> setting is to allow queries to be
+        executed against the database to check if this recovery target is the
+        most desirable point for recovery. The paused state can be resumed by
+        using <function>pg_xlog_replay_resume()</> (See
         <xref linkend="functions-recovery-control-table">), which then
         causes recovery to end. If this recovery target is not the
         desired stopping point, then shutdown the server, change the
@@ -302,8 +329,23 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
         continue recovery.
        </para>
        <para>
-        This setting has no effect if <xref linkend="guc-hot-standby"> is not
-        enabled, or if no recovery target is set.
+        The <literal>shutdown</> setting is useful to have instance ready at
+        exact replay point desired.
+        The instance will still be able to replay more WAL records (and in fact
+        will have to replay WAL records since last checkpoint next time it is
+        started).
+       </para>
+       <para>
+        Note that because <filename>recovery.conf</> will not be renamed when
+        <varname>action_at_recovery_target</> is set to <literal>shutdown</>,
+        any subsequent start will end with immediate shutdown unless the
+        configuration is changed or the <filename>recovery.conf</> is removed
+        manually.
+       </para>
+       <para>
+        This setting has no effect if no recovery target is set.
+        If <xref linkend="guc-hot-standby"> is not enabled, a setting of
+        <literal>pause</> will act the same as <literal>shutdown</>.
        </para>
       </listitem>
      </varlistentry>
index 8e712b793f33db6591ef36c79fd10ea8785d69b1..0f661f5010c7685282afb892ee17b300c5e9b4bc 100644 (file)
@@ -228,7 +228,7 @@ static char *recoveryEndCommand = NULL;
 static char *archiveCleanupCommand = NULL;
 static RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
 static bool recoveryTargetInclusive = true;
-static bool recoveryPauseAtTarget = true;
+static RecoveryTargetAction actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PAUSE;
 static TransactionId recoveryTargetXid;
 static TimestampTz recoveryTargetTime;
 static char *recoveryTargetName;
@@ -4647,6 +4647,9 @@ readRecoveryCommandFile(void)
    ConfigVariable *item,
               *head = NULL,
               *tail = NULL;
+   bool        recoveryPauseAtTargetSet = false;
+   bool        actionAtRecoveryTargetSet = false;
+
 
    fd = AllocateFile(RECOVERY_COMMAND_FILE, "r");
    if (fd == NULL)
@@ -4692,13 +4695,43 @@ readRecoveryCommandFile(void)
        }
        else if (strcmp(item->name, "pause_at_recovery_target") == 0)
        {
+           bool recoveryPauseAtTarget;
+
            if (!parse_bool(item->value, &recoveryPauseAtTarget))
                ereport(ERROR,
                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                         errmsg("parameter \"%s\" requires a Boolean value", "pause_at_recovery_target")));
+
            ereport(DEBUG2,
                    (errmsg_internal("pause_at_recovery_target = '%s'",
                                     item->value)));
+
+           actionAtRecoveryTarget = recoveryPauseAtTarget ?
+                                    RECOVERY_TARGET_ACTION_PAUSE :
+                                    RECOVERY_TARGET_ACTION_PROMOTE;
+
+           recoveryPauseAtTargetSet = true;
+       }
+       else if (strcmp(item->name, "action_at_recovery_target") == 0)
+       {
+           if (strcmp(item->value, "pause") == 0)
+               actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PAUSE;
+           else if (strcmp(item->value, "promote") == 0)
+               actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_PROMOTE;
+           else if (strcmp(item->value, "shutdown") == 0)
+               actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_SHUTDOWN;
+           else
+               ereport(ERROR,
+                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("invalid value for recovery parameter \"%s\"",
+                               "action_at_recovery_target"),
+                        errhint("The allowed values are \"pause\", \"promote\" and \"shutdown\".")));
+
+           ereport(DEBUG2,
+                   (errmsg_internal("action_at_recovery_target = '%s'",
+                                    item->value)));
+
+           actionAtRecoveryTargetSet = true;
        }
        else if (strcmp(item->name, "recovery_target_timeline") == 0)
        {
@@ -4863,6 +4896,28 @@ readRecoveryCommandFile(void)
                            RECOVERY_COMMAND_FILE)));
    }
 
+   /*
+    * Check for mutually exclusive parameters
+    */
+   if (recoveryPauseAtTargetSet && actionAtRecoveryTargetSet)
+       ereport(ERROR,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("cannot set both \"%s\" and \"%s\" recovery parameters",
+                       "pause_at_recovery_target",
+                       "action_at_recovery_target"),
+                errhint("The \"pause_at_recovery_target\" is deprecated.")));
+
+
+   /*
+    * Override any inconsistent requests. Not that this is a change
+    * of behaviour in 9.5; prior to this we simply ignored a request
+    * to pause if hot_standby = off, which was surprising behaviour.
+    */
+   if (actionAtRecoveryTarget == RECOVERY_TARGET_ACTION_PAUSE &&
+       actionAtRecoveryTargetSet &&
+       standbyState == STANDBY_DISABLED)
+           actionAtRecoveryTarget = RECOVERY_TARGET_ACTION_SHUTDOWN;
+
    /* Enable fetching from archive recovery area */
    ArchiveRecoveryRequested = true;
 
@@ -6415,10 +6470,37 @@ StartupXLOG(void)
             * end of main redo apply loop
             */
 
-           if (recoveryPauseAtTarget && reachedStopPoint)
+           if (reachedStopPoint)
            {
-               SetRecoveryPause(true);
-               recoveryPausesHere();
+               if (!reachedConsistency)
+                   ereport(FATAL,
+                       (errmsg("requested recovery stop point is before consistent recovery point")));
+
+               /*
+                * This is the last point where we can restart recovery with a
+                * new recovery target, if we shutdown and begin again. After
+                * this, Resource Managers may choose to do permanent corrective
+                * actions at end of recovery.
+                */
+               switch (actionAtRecoveryTarget)
+               {
+                   case RECOVERY_TARGET_ACTION_SHUTDOWN:
+                           /*
+                            * exit with special return code to request shutdown
+                            * of postmaster.  Log messages issued from
+                            * postmaster.
+                            */
+                           proc_exit(3);
+
+                   case RECOVERY_TARGET_ACTION_PAUSE:
+                           SetRecoveryPause(true);
+                           recoveryPausesHere();
+
+                           /* drop into promote */
+
+                   case RECOVERY_TARGET_ACTION_PROMOTE:
+                           break;
+               }
            }
 
            /* Allow resource managers to do any required cleanup. */
@@ -6436,6 +6518,7 @@ StartupXLOG(void)
                ereport(LOG,
                     (errmsg("last completed transaction was at log time %s",
                             timestamptz_to_str(xtime))));
+
            InRedo = false;
        }
        else
@@ -6496,13 +6579,6 @@ StartupXLOG(void)
        (EndOfLog < minRecoveryPoint ||
         !XLogRecPtrIsInvalid(ControlFile->backupStartPoint)))
    {
-       if (reachedStopPoint)
-       {
-           /* stopped because of stop request */
-           ereport(FATAL,
-                   (errmsg("requested recovery stop point is before consistent recovery point")));
-       }
-
        /*
         * Ran off end of WAL before reaching end-of-backup WAL record, or
         * minRecoveryPoint. That's usually a bad sign, indicating that you
index 6220a8e6e471e6d7f39331e2b3b42cc2acaa8001..5106f52e0e0d49e56d9cc0050df72e5c7fef8a61 100644 (file)
@@ -509,6 +509,7 @@ static void ShmemBackendArrayRemove(Backend *bn);
 /* Macros to check exit status of a child process */
 #define EXIT_STATUS_0(st)  ((st) == 0)
 #define EXIT_STATUS_1(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 1)
+#define EXIT_STATUS_3(st)  (WIFEXITED(st) && WEXITSTATUS(st) == 3)
 
 #ifndef WIN32
 /*
@@ -2555,6 +2556,17 @@ reaper(SIGNAL_ARGS)
                continue;
            }
 
+           if (EXIT_STATUS_3(exitstatus))
+           {
+               ereport(LOG,
+                   (errmsg("shutdown at recovery target")));
+               Shutdown = SmartShutdown;
+               TerminateChildren(SIGTERM);
+               pmState = PM_WAIT_BACKENDS;
+               /* PostmasterStateMachine logic does the rest */
+               continue;
+           }
+
            /*
             * Unexpected exit of startup process (including FATAL exit)
             * during PM_STARTUP is treated as catastrophic. There are no
index 423ef4d7fa0d63414cd8122b009677ca504cf5da..85b3fe76bb67f272a9559423ce9dbcab2bac78ba 100644 (file)
@@ -214,6 +214,16 @@ typedef struct XLogRecData
    uint32      len;            /* length of rmgr data to include */
 } XLogRecData;
 
+/*
+ * Recovery target action.
+ */
+typedef enum
+{
+   RECOVERY_TARGET_ACTION_PAUSE,
+   RECOVERY_TARGET_ACTION_PROMOTE,
+   RECOVERY_TARGET_ACTION_SHUTDOWN,
+} RecoveryTargetAction;
+
 /*
  * Method table for resource managers.
  *