Split xlog.c into xlog.c and xlogrecovery.c.

author Heikki Linnakangas <[email protected]>

Wed, 16 Feb 2022 07:30:38 +0000 (09:30 +0200)

committer Heikki Linnakangas <[email protected]>

Wed, 16 Feb 2022 07:30:38 +0000 (09:30 +0200)
author Heikki Linnakangas <[email protected]>
Wed, 16 Feb 2022 07:30:38 +0000 (09:30 +0200)
committer Heikki Linnakangas <[email protected]>
Wed, 16 Feb 2022 07:30:38 +0000 (09:30 +0200)
diff --git a/contrib/pg_prewarm/autoprewarm.c b/contrib/pg_prewarm/autoprewarm.c

index 5d40fb502095b9a1ecdcac43e3caf7a7988e7868..1d4d74b171f16debc76e615fcffba18b73c23b62 100644 (file)
--- a/contrib/pg_prewarm/autoprewarm.c
+++ b/contrib/pg_prewarm/autoprewarm.c
@@ -38,6 +38,7 @@
  #include "postmaster/interrupt.h"
  #include "storage/buf_internals.h"
  #include "storage/dsm.h"
+#include "storage/fd.h"
  #include "storage/ipc.h"
  #include "storage/latch.h"
  #include "storage/lwlock.h"
diff --git a/src/backend/access/transam/Makefile b/src/backend/access/transam/Makefile

index 595e02de722cf5382d92558dfc7c68f52018e20a..79314c69abc01e4b1dc0f9e1525dc38b183fc2ab 100644 (file)
--- a/src/backend/access/transam/Makefile
+++ b/src/backend/access/transam/Makefile
@@ -32,6 +32,7 @@ OBJS = \
     xlogfuncs.o \
     xloginsert.o \
     xlogreader.o \
+   xlogrecovery.o \
     xlogutils.o
  
  include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c

index c9516e03faee70171b5f718bd5b7d4c5e291ae4f..bb1f1069463119ade9290c4dae12da7b0467ea47 100644 (file)
--- a/src/backend/access/transam/xact.c
+++ b/src/backend/access/transam/xact.c
@@ -29,6 +29,7 @@
  #include "access/xact.h"
  #include "access/xlog.h"
  #include "access/xloginsert.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "catalog/index.h"
  #include "catalog/namespace.h"
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c

index 859eb32c48cc877e08c4c47499e25cec4e64cfe9..eb3c516058ff4555eb46654dc78a2b1d90be483a 100644 (file)
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3,6 +3,30 @@
   * xlog.c
   *     PostgreSQL write-ahead log manager
   *
+ * The Write-Ahead Log (WAL) functionality is split into several source
+ * files, in addition to this one:
+ *
+ * xloginsert.c - Functions for constructing WAL records
+ * xlogrecovery.c - WAL recovery and standby code
+ * xlogreader.c - Facility for reading WAL files and parsing WAL records
+ * xlogutils.c - Helper functions for WAL redo routines
+ *
+ * This file contains functions for coordinating database startup and
+ * checkpointing, and managing the write-ahead log buffers when the
+ * system is running.
+ *
+ * StartupXLOG() is the main entry point of the startup process.  It
+ * coordinates database startup, performing WAL recovery, and the
+ * transition from WAL recovery into normal operations.
+ *
+ * XLogInsertRecord() inserts a WAL record into the WAL buffers.  Most
+ * callers should not call this directly, but use the functions in
+ * xloginsert.c to construct the WAL record.  XLogFlush() can be used
+ * to force the WAL to disk.
+ *
+ * In addition to those, there are many other functions for interrogating
+ * the current system state, and for starting/stopping backups.
+ *
   *
   * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
@@ -36,12 +60,11 @@
  #include "access/xlogarchive.h"
  #include "access/xloginsert.h"
  #include "access/xlogreader.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "catalog/catversion.h"
  #include "catalog/pg_control.h"
  #include "catalog/pg_database.h"
-#include "commands/progress.h"
-#include "commands/tablespace.h"
  #include "common/controldata_utils.h"
  #include "executor/instrument.h"
  #include "miscadmin.h"
@@ -72,7 +95,6 @@
  #include "storage/smgr.h"
  #include "storage/spin.h"
  #include "storage/sync.h"
-#include "utils/builtins.h"
  #include "utils/guc.h"
  #include "utils/memutils.h"
  #include "utils/ps_status.h"
@@ -84,10 +106,6 @@
  
  extern uint32 bootstrap_data_checksum_version;
  
-/* Unsupported old recovery command file names (relative to $PGDATA) */
-#define RECOVERY_COMMAND_FILE  "recovery.conf"
-#define RECOVERY_COMMAND_DONE  "recovery.done"
-
  /* timeline ID to be used when bootstrapping */
  #define BootstrapTimeLineID        1
  
@@ -177,13 +195,6 @@ const struct config_enum_entry archive_mode_options[] = {
     {NULL, 0, false}
  };
  
-const struct config_enum_entry recovery_target_action_options[] = {
-   {"pause", RECOVERY_TARGET_ACTION_PAUSE, false},
-   {"promote", RECOVERY_TARGET_ACTION_PROMOTE, false},
-   {"shutdown", RECOVERY_TARGET_ACTION_SHUTDOWN, false},
-   {NULL, 0, false}
-};
-
  /*
   * Statistics for current checkpoint are collected in this global struct.
   * Because only the checkpointer or a stand-alone backend can perform
@@ -191,19 +202,6 @@ const struct config_enum_entry recovery_target_action_options[] = {
   */
  CheckpointStatsData CheckpointStats;
  
-/* Local copy of WalRcv->flushedUpto */
-static XLogRecPtr flushedUpto = 0;
-static TimeLineID receiveTLI = 0;
-
-/*
- * abortedRecPtr is the start pointer of a broken record at end of WAL when
- * recovery completes; missingContrecPtr is the location of the first
- * contrecord that went missing.  See CreateOverwriteContrecordRecord for
- * details.
- */
-static XLogRecPtr abortedRecPtr;
-static XLogRecPtr missingContrecPtr;
-
  /*
   * During recovery, lastFullPageWrites keeps track of full_page_writes that
   * the replayed WAL records indicate. It's initialized with full_page_writes
@@ -219,18 +217,6 @@ static bool lastFullPageWrites;
   */
  static bool LocalRecoveryInProgress = true;
  
-/*
- * Local copy of SharedHotStandbyActive variable. False actually means "not
- * known, need to check the shared state".
- */
-static bool LocalHotStandbyActive = false;
-
-/*
- * Local copy of SharedPromoteIsTriggered variable. False actually means "not
- * known, need to check the shared state".
- */
-static bool LocalPromoteIsTriggered = false;
-
  /*
   * Local state for XLogInsertAllowed():
   *     1: unconditionally allowed to insert XLOG
@@ -243,87 +229,6 @@ static bool LocalPromoteIsTriggered = false;
   */
  static int LocalXLogInsertAllowed = -1;
  
-/*
- * When ArchiveRecoveryRequested is set, archive recovery was requested,
- * ie. signal files were present. When InArchiveRecovery is set, we are
- * currently recovering using offline XLOG archives. These variables are only
- * valid in the startup process.
- *
- * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
- * currently performing crash recovery using only XLOG files in pg_wal, but
- * will switch to using offline XLOG archives as soon as we reach the end of
- * WAL in pg_wal.
-*/
-bool       ArchiveRecoveryRequested = false;
-bool       InArchiveRecovery = false;
-
-static bool standby_signal_file_found = false;
-static bool recovery_signal_file_found = false;
-
-/* Buffers dedicated to consistency checks of size BLCKSZ */
-static char *replay_image_masked = NULL;
-static char *primary_image_masked = NULL;
-
-/* options formerly taken from recovery.conf for archive recovery */
-char      *recoveryRestoreCommand = NULL;
-char      *recoveryEndCommand = NULL;
-char      *archiveCleanupCommand = NULL;
-RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
-bool       recoveryTargetInclusive = true;
-int            recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE;
-TransactionId recoveryTargetXid;
-char      *recovery_target_time_string;
-static TimestampTz recoveryTargetTime;
-const char *recoveryTargetName;
-XLogRecPtr recoveryTargetLSN;
-int            recovery_min_apply_delay = 0;
-
-/* options formerly taken from recovery.conf for XLOG streaming */
-bool       StandbyModeRequested = false;
-char      *PrimaryConnInfo = NULL;
-char      *PrimarySlotName = NULL;
-char      *PromoteTriggerFile = NULL;
-bool       wal_receiver_create_temp_slot = false;
-
-/* are we currently in standby mode? */
-bool       StandbyMode = false;
-
-/*
- * if recoveryStopsBefore/After returns true, it saves information of the stop
- * point here
- */
-static TransactionId recoveryStopXid;
-static TimestampTz recoveryStopTime;
-static XLogRecPtr recoveryStopLSN;
-static char recoveryStopName[MAXFNAMELEN];
-static bool recoveryStopAfter;
-
-/*
- * recoveryTargetTimeLineGoal: what the user requested, if any
- *
- * recoveryTargetTLIRequested: numeric value of requested timeline, if constant
- *
- * recoveryTargetTLI: the currently understood target timeline; changes
- *
- * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and the timelines of
- * its known parents, newest first (so recoveryTargetTLI is always the
- * first list member).  Only these TLIs are expected to be seen in the WAL
- * segments we read, and indeed only these TLIs will be considered as
- * candidate WAL files to open at all.
- *
- * curFileTLI: the TLI appearing in the name of the current input WAL file.
- * (This is not necessarily the same as the timeline from which we are
- * replaying WAL, which StartupXLOG calls replayTLI, because we could be
- * scanning data that was copied from an ancestor timeline when the current
- * file was created.)  During a sequential scan we do not allow this value
- * to decrease.
- */
-RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal = RECOVERY_TARGET_TIMELINE_LATEST;
-TimeLineID recoveryTargetTLIRequested = 0;
-TimeLineID recoveryTargetTLI = 0;
-static List *expectedTLEs;
-static TimeLineID curFileTLI;
-
  /*
   * ProcLastRecPtr points to the start of the last XLOG record inserted by the
   * current backend.  It is updated for all inserts.  XactLastRecEnd points to
@@ -374,21 +279,6 @@ static XLogRecPtr RedoRecPtr;
   */
  static bool doPageWrites;
  
-/* Has the recovery code requested a walreceiver wakeup? */
-static bool doRequestWalReceiverReply;
-
-/*
- * RedoStartLSN points to the checkpoint's REDO location which is specified
- * in a backup label file, backup history file or control file. In standby
- * mode, XLOG streaming usually starts from the position where an invalid
- * record was found. But if we fail to read even the initial checkpoint
- * record, we use the REDO location instead of the checkpoint location as
- * the start position of XLOG streaming. Otherwise we would have to jump
- * backwards to the REDO location after reading the checkpoint record,
- * because the REDO record can precede the checkpoint record.
- */
-static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
-
  /*----------
   * Shared-memory data structures for XLOG control
   *
@@ -650,12 +540,6 @@ typedef struct XLogCtlData
      */
     RecoveryState SharedRecoveryState;
  
-   /*
-    * SharedHotStandbyActive indicates if we allow hot standby queries to be
-    * run.  Protected by info_lck.
-    */
-   bool        SharedHotStandbyActive;
-
     /*
      * InstallXLogFileSegmentActive indicates whether the checkpointer should
      * arrange for future segments by recycling and/or PreallocXlogFiles().
@@ -666,12 +550,6 @@ typedef struct XLogCtlData
      */
     bool        InstallXLogFileSegmentActive;
  
-   /*
-    * SharedPromoteIsTriggered indicates if a standby promotion has been
-    * triggered.  Protected by info_lck.
-    */
-   bool        SharedPromoteIsTriggered;
-
     /*
      * WalWriterSleeping indicates whether the WAL writer is currently in
      * low-power mode (and hence should be nudged if an async commit occurs).
@@ -679,23 +557,6 @@ typedef struct XLogCtlData
      */
     bool        WalWriterSleeping;
  
-   /*
-    * recoveryWakeupLatch is used to wake up the startup process to continue
-    * WAL replay, if it is waiting for WAL to arrive or failover trigger file
-    * to appear.
-    *
-    * Note that the startup process also uses another latch, its procLatch,
-    * to wait for recovery conflict. If we get rid of recoveryWakeupLatch for
-    * signaling the startup process in favor of using its procLatch, which
-    * comports better with possible generic signal handlers using that latch.
-    * But we should not do that because the startup process doesn't assume
-    * that it's waken up by walreceiver process or SIGHUP signal handler
-    * while it's waiting for recovery conflict. The separate latches,
-    * recoveryWakeupLatch and procLatch, should be used for inter-process
-    * communication for WAL replay and recovery conflict, respectively.
-    */
-   Latch       recoveryWakeupLatch;
-
     /*
      * During recovery, we keep a copy of the latest checkpoint record here.
      * lastCheckPointRecPtr points to start of checkpoint record and
@@ -708,28 +569,6 @@ typedef struct XLogCtlData
     XLogRecPtr  lastCheckPointEndPtr;
     CheckPoint  lastCheckPoint;
  
-   /*
-    * lastReplayedEndRecPtr points to end+1 of the last record successfully
-    * replayed. When we're currently replaying a record, ie. in a redo
-    * function, replayEndRecPtr points to the end+1 of the record being
-    * replayed, otherwise it's equal to lastReplayedEndRecPtr.
-    */
-   XLogRecPtr  lastReplayedEndRecPtr;
-   TimeLineID  lastReplayedTLI;
-   XLogRecPtr  replayEndRecPtr;
-   TimeLineID  replayEndTLI;
-   /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
-   TimestampTz recoveryLastXTime;
-
-   /*
-    * timestamp of when we started replaying the current chunk of WAL data,
-    * only relevant for replication or archive recovery
-    */
-   TimestampTz currentChunkStartTime;
-   /* Recovery pause state */
-   RecoveryPauseState recoveryPauseState;
-   ConditionVariable recoveryNotPausedCV;
-
     /*
      * lastFpwDisableRecPtr points to the start of the last replayed
      * XLOG_FPW_CHANGE record that instructs full_page_writes is disabled.
@@ -787,21 +626,6 @@ static int UsableBytesInSegment;
   */
  static XLogwrtResult LogwrtResult = {0, 0};
  
-/*
- * Codes indicating where we got a WAL file from during recovery, or where
- * to attempt to get one.
- */
-typedef enum
-{
-   XLOG_FROM_ANY = 0,          /* request to read WAL from any source */
-   XLOG_FROM_ARCHIVE,          /* restored using restore_command */
-   XLOG_FROM_PG_WAL,           /* existing file in pg_wal */
-   XLOG_FROM_STREAM            /* streamed from primary */
-} XLogSource;
-
-/* human-readable names for XLogSources, for debugging output */
-static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};
-
  /*
   * openLogFile is -1 or a kernel FD for an open log file segment.
   * openLogSegNo identifies the segment, and openLogTLI the corresponding TLI.
@@ -814,74 +638,17 @@ static int    openLogFile = -1;
  static XLogSegNo openLogSegNo = 0;
  static TimeLineID openLogTLI = 0;
  
-/*
- * These variables are used similarly to the ones above, but for reading
- * the XLOG.  readOff is the offset of the page just read, readLen
- * indicates how much of it has been read into readBuf, and readSource
- * indicates where we got the currently open file from.
- * Note: we could use Reserve/ReleaseExternalFD to track consumption of
- * this FD too; but it doesn't currently seem worthwhile, since the XLOG is
- * not read by general-purpose sessions.
- */
-static int readFile = -1;
-static XLogSegNo readSegNo = 0;
-static uint32 readOff = 0;
-static uint32 readLen = 0;
-static XLogSource readSource = XLOG_FROM_ANY;
-
-/*
- * Keeps track of which source we're currently reading from. This is
- * different from readSource in that this is always set, even when we don't
- * currently have a WAL file open. If lastSourceFailed is set, our last
- * attempt to read from currentSource failed, and we should try another source
- * next.
- *
- * pendingWalRcvRestart is set when a config change occurs that requires a
- * walreceiver restart.  This is only valid in XLOG_FROM_STREAM state.
- */
-static XLogSource currentSource = XLOG_FROM_ANY;
-static bool lastSourceFailed = false;
-static bool pendingWalRcvRestart = false;
-
-typedef struct XLogPageReadPrivate
-{
-   int         emode;
-   bool        fetching_ckpt;  /* are we fetching a checkpoint record? */
-   bool        randAccess;
-   TimeLineID  replayTLI;
-} XLogPageReadPrivate;
-
-/*
- * These variables track when we last obtained some WAL data to process,
- * and where we got it from.  (XLogReceiptSource is initially the same as
- * readSource, but readSource gets reset to zero when we don't have data
- * to process right now.  It is also different from currentSource, which
- * also changes when we try to read from a source and fail, while
- * XLogReceiptSource tracks where we last successfully read some WAL.)
- */
-static TimestampTz XLogReceiptTime = 0;
-static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
-
  /*
   * Local copies of equivalent fields in the control file.  When running
- * crash recovery, minRecoveryPoint is set to InvalidXLogRecPtr as we
+ * crash recovery, LocalMinRecoveryPoint is set to InvalidXLogRecPtr as we
   * expect to replay all the WAL available, and updateMinRecoveryPoint is
   * switched to false to prevent any updates while replaying records.
   * Those values are kept consistent as long as crash recovery runs.
   */
-static XLogRecPtr minRecoveryPoint;
-static TimeLineID minRecoveryPointTLI;
+static XLogRecPtr LocalMinRecoveryPoint;
+static TimeLineID LocalMinRecoveryPointTLI;
  static bool updateMinRecoveryPoint = true;
  
-/*
- * Have we reached a consistent database state? In crash recovery, we have
- * to replay all the WAL, so reachedConsistency is never set. During archive
- * recovery, the database is consistent once minRecoveryPoint is reached.
- */
-bool       reachedConsistency = false;
-
-static bool InRedo = false;
-
  /* For WALInsertLockAcquire/Release functions */
  static int MyLockNo = 0;
  static bool holdingAllLocks = false;
@@ -890,27 +657,11 @@ static bool holdingAllLocks = false;
  static MemoryContext walDebugCxt = NULL;
  #endif
  
-static void readRecoverySignalFile(void);
-static void validateRecoveryParameters(void);
-static void XLogInitNewTimeline(TimeLineID endTLI, XLogRecPtr endOfLog,
-                               TimeLineID newTLI);
  static void CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI,
                                         XLogRecPtr EndOfLog,
                                         TimeLineID newTLI);
-static bool recoveryStopsBefore(XLogReaderState *record);
-static bool recoveryStopsAfter(XLogReaderState *record);
-static char *getRecoveryStopReason(void);
-static void ConfirmRecoveryPaused(void);
-static void recoveryPausesHere(bool endOfRecovery);
-static bool recoveryApplyDelay(XLogReaderState *record);
-static void SetLatestXTime(TimestampTz xtime);
-static void SetCurrentChunkStartTime(TimestampTz xtime);
  static void CheckRequiredParameterValues(void);
  static void XLogReportParameters(void);
-static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
-                               TimeLineID prevTLI, TimeLineID replayTLI);
-static void VerifyOverwriteContrecord(xl_overwrite_contrecord *xlrec,
-                                     XLogReaderState *state);
  static int LocalSetXLogInsertAllowed(void);
  static void CreateEndOfRecoveryRecord(void);
  static XLogRecPtr CreateOverwriteContrecordRecord(XLogRecPtr aborted_lsn,
@@ -922,22 +673,10 @@ static XLogRecPtr XLogGetReplicationSlotMinimumLSN(void);
  
  static void AdvanceXLInsertBuffer(XLogRecPtr upto, TimeLineID tli,
                                   bool opportunistic);
-static bool XLogCheckpointNeeded(XLogSegNo new_segno);
  static void XLogWrite(XLogwrtRqst WriteRqst, TimeLineID tli, bool flexible);
  static bool InstallXLogFileSegment(XLogSegNo *segno, char *tmppath,
                                    bool find_free, XLogSegNo max_segno,
                                    TimeLineID tli);
-static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
-                        XLogSource source, bool notfoundOk);
-static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source);
-static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
-                        int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
-static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
-                                       bool fetching_ckpt, XLogRecPtr tliRecPtr,
-                                       TimeLineID replayTLI,
-                                       XLogRecPtr replayLSN);
-static void XLogShutdownWalRcv(void);
-static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
  static void XLogFileClose(void);
  static void PreallocXlogFiles(XLogRecPtr endptr, TimeLineID tli);
  static void RemoveTempXlogFiles(void);
@@ -949,36 +688,16 @@ static void UpdateLastRemovedPtr(char *filename);
  static void ValidateXLOGDirectoryStructure(void);
  static void CleanupBackupHistory(void);
  static void UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force);
-static XLogRecord *ReadRecord(XLogReaderState *xlogreader,
-                             int emode, bool fetching_ckpt,
-                             TimeLineID replayTLI);
-static void CheckRecoveryConsistency(void);
  static bool PerformRecoveryXLogAction(void);
-static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader,
-                                       XLogRecPtr RecPtr, int whichChkpt, bool report,
-                                       TimeLineID replayTLI);
-static bool rescanLatestTimeLine(TimeLineID replayTLI,
-                                XLogRecPtr replayLSN);
  static void InitControlFile(uint64 sysidentifier);
  static void WriteControlFile(void);
  static void ReadControlFile(void);
+static void UpdateControlFile(void);
  static char *str_time(pg_time_t tnow);
-static void SetPromoteIsTriggered(void);
-static bool CheckForStandbyTrigger(void);
  
-#ifdef WAL_DEBUG
-static void xlog_outrec(StringInfo buf, XLogReaderState *record);
-#endif
-static void xlog_block_info(StringInfo buf, XLogReaderState *record);
-static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
  static void pg_start_backup_callback(int code, Datum arg);
  static void pg_stop_backup_callback(int code, Datum arg);
-static bool read_backup_label(XLogRecPtr *checkPointLoc,
-                             TimeLineID *backupLabelTLI,
-                             bool *backupEndRequired, bool *backupFromStandby);
-static bool read_tablespace_map(List **tablespaces);
  
-static void rm_redo_error_callback(void *arg);
  static int get_sync_bit(int method);
  
  static void CopyXLogRecordToWAL(int write_len, bool isLogSwitch,
@@ -994,7 +713,6 @@ static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli);
  static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
  static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
  static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
-static void checkXLogConsistency(XLogReaderState *record);
  
  static void WALInsertLockAcquire(void);
  static void WALInsertLockAcquireExclusive(void);
@@ -1442,114 +1160,6 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
     return true;
  }
  
-/*
- * Checks whether the current buffer page and backup page stored in the
- * WAL record are consistent or not. Before comparing the two pages, a
- * masking can be applied to the pages to ignore certain areas like hint bits,
- * unused space between pd_lower and pd_upper among other things. This
- * function should be called once WAL replay has been completed for a
- * given record.
- */
-static void
-checkXLogConsistency(XLogReaderState *record)
-{
-   RmgrId      rmid = XLogRecGetRmid(record);
-   RelFileNode rnode;
-   ForkNumber  forknum;
-   BlockNumber blkno;
-   int         block_id;
-
-   /* Records with no backup blocks have no need for consistency checks. */
-   if (!XLogRecHasAnyBlockRefs(record))
-       return;
-
-   Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
-
-   for (block_id = 0; block_id <= record->max_block_id; block_id++)
-   {
-       Buffer      buf;
-       Page        page;
-
-       if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
-       {
-           /*
-            * WAL record doesn't contain a block reference with the given id.
-            * Do nothing.
-            */
-           continue;
-       }
-
-       Assert(XLogRecHasBlockImage(record, block_id));
-
-       if (XLogRecBlockImageApply(record, block_id))
-       {
-           /*
-            * WAL record has already applied the page, so bypass the
-            * consistency check as that would result in comparing the full
-            * page stored in the record with itself.
-            */
-           continue;
-       }
-
-       /*
-        * Read the contents from the current buffer and store it in a
-        * temporary page.
-        */
-       buf = XLogReadBufferExtended(rnode, forknum, blkno,
-                                    RBM_NORMAL_NO_LOG);
-       if (!BufferIsValid(buf))
-           continue;
-
-       LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
-       page = BufferGetPage(buf);
-
-       /*
-        * Take a copy of the local page where WAL has been applied to have a
-        * comparison base before masking it...
-        */
-       memcpy(replay_image_masked, page, BLCKSZ);
-
-       /* No need for this page anymore now that a copy is in. */
-       UnlockReleaseBuffer(buf);
-
-       /*
-        * If the block LSN is already ahead of this WAL record, we can't
-        * expect contents to match.  This can happen if recovery is
-        * restarted.
-        */
-       if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
-           continue;
-
-       /*
-        * Read the contents from the backup copy, stored in WAL record and
-        * store it in a temporary page. There is no need to allocate a new
-        * page here, a local buffer is fine to hold its contents and a mask
-        * can be directly applied on it.
-        */
-       if (!RestoreBlockImage(record, block_id, primary_image_masked))
-           elog(ERROR, "failed to restore block image");
-
-       /*
-        * If masking function is defined, mask both the primary and replay
-        * images
-        */
-       if (RmgrTable[rmid].rm_mask != NULL)
-       {
-           RmgrTable[rmid].rm_mask(replay_image_masked, blkno);
-           RmgrTable[rmid].rm_mask(primary_image_masked, blkno);
-       }
-
-       /* Time to compare the primary and replay images. */
-       if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
-       {
-           elog(FATAL,
-                "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
-                rnode.spcNode, rnode.dbNode, rnode.relNode,
-                forknum, blkno);
-       }
-   }
-}
-
  /*
   * Subroutine of XLogInsertRecord.  Copies a WAL record to an already-reserved
   * area in the WAL.
@@ -2435,7 +2045,7 @@ XLOGfileslop(XLogRecPtr lastredoptr)
   *
   * Note: it is caller's responsibility that RedoRecPtr is up-to-date.
   */
-static bool
+bool
  XLogCheckpointNeeded(XLogSegNo new_segno)
  {
     XLogSegNo   old_segno;
@@ -2829,7 +2439,7 @@ static void
  UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
  {
     /* Quick check using our local copy of the variable */
-   if (!updateMinRecoveryPoint || (!force && lsn <= minRecoveryPoint))
+   if (!updateMinRecoveryPoint || (!force && lsn <= LocalMinRecoveryPoint))
         return;
  
     /*
@@ -2843,7 +2453,7 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
      * available is replayed in this case.  This also saves from extra locks
      * taken on the control file from the startup process.
      */
-   if (XLogRecPtrIsInvalid(minRecoveryPoint) && InRecovery)
+   if (XLogRecPtrIsInvalid(LocalMinRecoveryPoint) && InRecovery)
     {
         updateMinRecoveryPoint = false;
         return;
@@ -2852,12 +2462,12 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
     LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
  
     /* update local copy */
-   minRecoveryPoint = ControlFile->minRecoveryPoint;
-   minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+   LocalMinRecoveryPoint = ControlFile->minRecoveryPoint;
+   LocalMinRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
  
-   if (XLogRecPtrIsInvalid(minRecoveryPoint))
+   if (XLogRecPtrIsInvalid(LocalMinRecoveryPoint))
         updateMinRecoveryPoint = false;
-   else if (force || minRecoveryPoint < lsn)
+   else if (force || LocalMinRecoveryPoint < lsn)
     {
         XLogRecPtr  newMinRecoveryPoint;
         TimeLineID  newMinRecoveryPointTLI;
@@ -2875,11 +2485,7 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
          * all.  Instead, we just log a warning and continue with recovery.
          * (See also the comments about corrupt LSNs in XLogFlush.)
          */
-       SpinLockAcquire(&XLogCtl->info_lck);
-       newMinRecoveryPoint = XLogCtl->replayEndRecPtr;
-       newMinRecoveryPointTLI = XLogCtl->replayEndTLI;
-       SpinLockRelease(&XLogCtl->info_lck);
-
+       newMinRecoveryPoint = GetCurrentReplayRecPtr(&newMinRecoveryPointTLI);
         if (!force && newMinRecoveryPoint < lsn)
             elog(WARNING,
                  "xlog min recovery request %X/%X is past current point %X/%X",
@@ -2891,12 +2497,12 @@ UpdateMinRecoveryPoint(XLogRecPtr lsn, bool force)
             ControlFile->minRecoveryPoint = newMinRecoveryPoint;
             ControlFile->minRecoveryPointTLI = newMinRecoveryPointTLI;
             UpdateControlFile();
-           minRecoveryPoint = newMinRecoveryPoint;
-           minRecoveryPointTLI = newMinRecoveryPointTLI;
+           LocalMinRecoveryPoint = newMinRecoveryPoint;
+           LocalMinRecoveryPointTLI = newMinRecoveryPointTLI;
  
             ereport(DEBUG2,
                     (errmsg_internal("updated min recovery point to %X/%X on timeline %u",
-                                    LSN_FORMAT_ARGS(minRecoveryPoint),
+                                    LSN_FORMAT_ARGS(newMinRecoveryPoint),
                                      newMinRecoveryPointTLI)));
         }
     }
@@ -3256,11 +2862,11 @@ XLogNeedsFlush(XLogRecPtr record)
          * which cannot update its local copy of minRecoveryPoint as long as
          * it has not replayed all WAL available when doing crash recovery.
          */
-       if (XLogRecPtrIsInvalid(minRecoveryPoint) && InRecovery)
+       if (XLogRecPtrIsInvalid(LocalMinRecoveryPoint) && InRecovery)
             updateMinRecoveryPoint = false;
  
         /* Quick exit if already known to be updated or cannot be updated */
-       if (record <= minRecoveryPoint || !updateMinRecoveryPoint)
+       if (record <= LocalMinRecoveryPoint || !updateMinRecoveryPoint)
             return false;
  
         /*
@@ -3269,8 +2875,8 @@ XLogNeedsFlush(XLogRecPtr record)
          */
         if (!LWLockConditionalAcquire(ControlFileLock, LW_SHARED))
             return true;
-       minRecoveryPoint = ControlFile->minRecoveryPoint;
-       minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+       LocalMinRecoveryPoint = ControlFile->minRecoveryPoint;
+       LocalMinRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
         LWLockRelease(ControlFileLock);
  
         /*
@@ -3278,11 +2884,11 @@ XLogNeedsFlush(XLogRecPtr record)
          * process doing crash recovery, which should not update the control
          * file value if crash recovery is still running.
          */
-       if (XLogRecPtrIsInvalid(minRecoveryPoint))
+       if (XLogRecPtrIsInvalid(LocalMinRecoveryPoint))
             updateMinRecoveryPoint = false;
  
         /* check again */
-       if (record <= minRecoveryPoint || !updateMinRecoveryPoint)
+       if (record <= LocalMinRecoveryPoint || !updateMinRecoveryPoint)
             return false;
         else
             return true;
@@ -3763,192 +3369,6 @@ XLogFileOpen(XLogSegNo segno, TimeLineID tli)
     return fd;
  }
  
-/*
- * Open a logfile segment for reading (during recovery).
- *
- * If source == XLOG_FROM_ARCHIVE, the segment is retrieved from archive.
- * Otherwise, it's assumed to be already available in pg_wal.
- */
-static int
-XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
-            XLogSource source, bool notfoundOk)
-{
-   char        xlogfname[MAXFNAMELEN];
-   char        activitymsg[MAXFNAMELEN + 16];
-   char        path[MAXPGPATH];
-   int         fd;
-
-   XLogFileName(xlogfname, tli, segno, wal_segment_size);
-
-   switch (source)
-   {
-       case XLOG_FROM_ARCHIVE:
-           /* Report recovery progress in PS display */
-           snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
-                    xlogfname);
-           set_ps_display(activitymsg);
-
-           if (!RestoreArchivedFile(path, xlogfname,
-                                    "RECOVERYXLOG",
-                                    wal_segment_size,
-                                    InRedo))
-               return -1;
-           break;
-
-       case XLOG_FROM_PG_WAL:
-       case XLOG_FROM_STREAM:
-           XLogFilePath(path, tli, segno, wal_segment_size);
-           break;
-
-       default:
-           elog(ERROR, "invalid XLogFileRead source %d", source);
-   }
-
-   /*
-    * If the segment was fetched from archival storage, replace the existing
-    * xlog segment (if any) with the archival version.
-    */
-   if (source == XLOG_FROM_ARCHIVE)
-   {
-       Assert(!XLogCtl->InstallXLogFileSegmentActive);
-       KeepFileRestoredFromArchive(path, xlogfname);
-
-       /*
-        * Set path to point at the new file in pg_wal.
-        */
-       snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
-   }
-
-   fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
-   if (fd >= 0)
-   {
-       /* Success! */
-       curFileTLI = tli;
-
-       /* Report recovery progress in PS display */
-       snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
-                xlogfname);
-       set_ps_display(activitymsg);
-
-       /* Track source of data in assorted state variables */
-       readSource = source;
-       XLogReceiptSource = source;
-       /* In FROM_STREAM case, caller tracks receipt time, not me */
-       if (source != XLOG_FROM_STREAM)
-           XLogReceiptTime = GetCurrentTimestamp();
-
-       return fd;
-   }
-   if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
-       ereport(PANIC,
-               (errcode_for_file_access(),
-                errmsg("could not open file \"%s\": %m", path)));
-   return -1;
-}
-
-/*
- * Open a logfile segment for reading (during recovery).
- *
- * This version searches for the segment with any TLI listed in expectedTLEs.
- */
-static int
-XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source)
-{
-   char        path[MAXPGPATH];
-   ListCell   *cell;
-   int         fd;
-   List       *tles;
-
-   /*
-    * Loop looking for a suitable timeline ID: we might need to read any of
-    * the timelines listed in expectedTLEs.
-    *
-    * We expect curFileTLI on entry to be the TLI of the preceding file in
-    * sequence, or 0 if there was no predecessor.  We do not allow curFileTLI
-    * to go backwards; this prevents us from picking up the wrong file when a
-    * parent timeline extends to higher segment numbers than the child we
-    * want to read.
-    *
-    * If we haven't read the timeline history file yet, read it now, so that
-    * we know which TLIs to scan.  We don't save the list in expectedTLEs,
-    * however, unless we actually find a valid segment.  That way if there is
-    * neither a timeline history file nor a WAL segment in the archive, and
-    * streaming replication is set up, we'll read the timeline history file
-    * streamed from the primary when we start streaming, instead of
-    * recovering with a dummy history generated here.
-    */
-   if (expectedTLEs)
-       tles = expectedTLEs;
-   else
-       tles = readTimeLineHistory(recoveryTargetTLI);
-
-   foreach(cell, tles)
-   {
-       TimeLineHistoryEntry *hent = (TimeLineHistoryEntry *) lfirst(cell);
-       TimeLineID  tli = hent->tli;
-
-       if (tli < curFileTLI)
-           break;              /* don't bother looking at too-old TLIs */
-
-       /*
-        * Skip scanning the timeline ID that the logfile segment to read
-        * doesn't belong to
-        */
-       if (hent->begin != InvalidXLogRecPtr)
-       {
-           XLogSegNo   beginseg = 0;
-
-           XLByteToSeg(hent->begin, beginseg, wal_segment_size);
-
-           /*
-            * The logfile segment that doesn't belong to the timeline is
-            * older or newer than the segment that the timeline started or
-            * ended at, respectively. It's sufficient to check only the
-            * starting segment of the timeline here. Since the timelines are
-            * scanned in descending order in this loop, any segments newer
-            * than the ending segment should belong to newer timeline and
-            * have already been read before. So it's not necessary to check
-            * the ending segment of the timeline here.
-            */
-           if (segno < beginseg)
-               continue;
-       }
-
-       if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE)
-       {
-           fd = XLogFileRead(segno, emode, tli,
-                             XLOG_FROM_ARCHIVE, true);
-           if (fd != -1)
-           {
-               elog(DEBUG1, "got WAL segment from archive");
-               if (!expectedTLEs)
-                   expectedTLEs = tles;
-               return fd;
-           }
-       }
-
-       if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)
-       {
-           fd = XLogFileRead(segno, emode, tli,
-                             XLOG_FROM_PG_WAL, true);
-           if (fd != -1)
-           {
-               if (!expectedTLEs)
-                   expectedTLEs = tles;
-               return fd;
-           }
-       }
-   }
-
-   /* Couldn't find it.  For simplicity, complain about front timeline */
-   XLogFilePath(path, recoveryTargetTLI, segno, wal_segment_size);
-   errno = ENOENT;
-   ereport(emode,
-           (errcode_for_file_access(),
-            errmsg("could not open file \"%s\": %m", path)));
-   return -1;
-}
-
  /*
   * Close the current logfile segment for writing.
   */
@@ -4216,7 +3636,7 @@ RemoveOldXlogFiles(XLogSegNo segno, XLogRecPtr lastredoptr, XLogRecPtr endptr,
   * 'switchpoint' is the current point in WAL where we switch to new timeline,
   * and 'newTLI' is the new timeline we switch to.
   */
-static void
+void
  RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI)
  {
     DIR        *xldir;
@@ -4442,298 +3862,43 @@ CleanupBackupHistory(void)
  }
  
  /*
- * Attempt to read the next XLOG record.
+ * I/O routines for pg_control
   *
- * Before first call, the reader needs to be positioned to the first record
- * by calling XLogBeginRead().
+ * *ControlFile is a buffer in shared memory that holds an image of the
+ * contents of pg_control.  WriteControlFile() initializes pg_control
+ * given a preloaded buffer, ReadControlFile() loads the buffer from
+ * the pg_control file (during postmaster or standalone-backend startup),
+ * and UpdateControlFile() rewrites pg_control after we modify xlog state.
+ * InitControlFile() fills the buffer with initial values.
   *
- * If no valid record is available, returns NULL, or fails if emode is PANIC.
- * (emode must be either PANIC, LOG). In standby mode, retries until a valid
- * record is available.
+ * For simplicity, WriteControlFile() initializes the fields of pg_control
+ * that are related to checking backend/database compatibility, and
+ * ReadControlFile() verifies they are correct.  We could split out the
+ * I/O and compatibility-check functions, but there seems no need currently.
   */
-static XLogRecord *
-ReadRecord(XLogReaderState *xlogreader, int emode,
-          bool fetching_ckpt, TimeLineID replayTLI)
-{
-   XLogRecord *record;
-   XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
  
-   /* Pass through parameters to XLogPageRead */
-   private->fetching_ckpt = fetching_ckpt;
-   private->emode = emode;
-   private->randAccess = (xlogreader->ReadRecPtr == InvalidXLogRecPtr);
-   private->replayTLI = replayTLI;
+static void
+InitControlFile(uint64 sysidentifier)
+{
+   char        mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
  
-   /* This is the first attempt to read this page. */
-   lastSourceFailed = false;
+   /*
+    * Generate a random nonce. This is used for authentication requests that
+    * will fail because the user does not exist. The nonce is used to create
+    * a genuine-looking password challenge for the non-existent user, in lieu
+    * of an actual stored password.
+    */
+   if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
+       ereport(PANIC,
+               (errcode(ERRCODE_INTERNAL_ERROR),
+                errmsg("could not generate secret authorization token")));
  
-   for (;;)
-   {
-       char       *errormsg;
-
-       record = XLogReadRecord(xlogreader, &errormsg);
-       if (record == NULL)
-       {
-           /*
-            * When not in standby mode we find that WAL ends in an incomplete
-            * record, keep track of that record.  After recovery is done,
-            * we'll write a record to indicate downstream WAL readers that
-            * that portion is to be ignored.
-            */
-           if (!StandbyMode &&
-               !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr))
-           {
-               abortedRecPtr = xlogreader->abortedRecPtr;
-               missingContrecPtr = xlogreader->missingContrecPtr;
-           }
-
-           if (readFile >= 0)
-           {
-               close(readFile);
-               readFile = -1;
-           }
-
-           /*
-            * We only end up here without a message when XLogPageRead()
-            * failed - in that case we already logged something. In
-            * StandbyMode that only happens if we have been triggered, so we
-            * shouldn't loop anymore in that case.
-            */
-           if (errormsg)
-               ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
-                       (errmsg_internal("%s", errormsg) /* already translated */ ));
-       }
-
-       /*
-        * Check page TLI is one of the expected values.
-        */
-       else if (!tliInHistory(xlogreader->latestPageTLI, expectedTLEs))
-       {
-           char        fname[MAXFNAMELEN];
-           XLogSegNo   segno;
-           int32       offset;
-
-           XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size);
-           offset = XLogSegmentOffset(xlogreader->latestPagePtr,
-                                      wal_segment_size);
-           XLogFileName(fname, xlogreader->seg.ws_tli, segno,
-                        wal_segment_size);
-           ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
-                   (errmsg("unexpected timeline ID %u in log segment %s, offset %u",
-                           xlogreader->latestPageTLI,
-                           fname,
-                           offset)));
-           record = NULL;
-       }
-
-       if (record)
-       {
-           /* Great, got a record */
-           return record;
-       }
-       else
-       {
-           /* No valid record available from this source */
-           lastSourceFailed = true;
-
-           /*
-            * If archive recovery was requested, but we were still doing
-            * crash recovery, switch to archive recovery and retry using the
-            * offline archive. We have now replayed all the valid WAL in
-            * pg_wal, so we are presumably now consistent.
-            *
-            * We require that there's at least some valid WAL present in
-            * pg_wal, however (!fetching_ckpt).  We could recover using the
-            * WAL from the archive, even if pg_wal is completely empty, but
-            * we'd have no idea how far we'd have to replay to reach
-            * consistency.  So err on the safe side and give up.
-            */
-           if (!InArchiveRecovery && ArchiveRecoveryRequested &&
-               !fetching_ckpt)
-           {
-               ereport(DEBUG1,
-                       (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
-               InArchiveRecovery = true;
-               if (StandbyModeRequested)
-                   StandbyMode = true;
-
-               /* initialize minRecoveryPoint to this record */
-               LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-               ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
-               if (ControlFile->minRecoveryPoint < xlogreader->EndRecPtr)
-               {
-                   ControlFile->minRecoveryPoint = xlogreader->EndRecPtr;
-                   ControlFile->minRecoveryPointTLI = replayTLI;
-               }
-               /* update local copy */
-               minRecoveryPoint = ControlFile->minRecoveryPoint;
-               minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
-
-               /*
-                * The startup process can update its local copy of
-                * minRecoveryPoint from this point.
-                */
-               updateMinRecoveryPoint = true;
-
-               UpdateControlFile();
-
-               /*
-                * We update SharedRecoveryState while holding the lock on
-                * ControlFileLock so both states are consistent in shared
-                * memory.
-                */
-               SpinLockAcquire(&XLogCtl->info_lck);
-               XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
-               SpinLockRelease(&XLogCtl->info_lck);
-
-               LWLockRelease(ControlFileLock);
-
-               CheckRecoveryConsistency();
-
-               /*
-                * Before we retry, reset lastSourceFailed and currentSource
-                * so that we will check the archive next.
-                */
-               lastSourceFailed = false;
-               currentSource = XLOG_FROM_ANY;
-
-               continue;
-           }
-
-           /* In standby mode, loop back to retry. Otherwise, give up. */
-           if (StandbyMode && !CheckForStandbyTrigger())
-               continue;
-           else
-               return NULL;
-       }
-   }
-}
-
-/*
- * Scan for new timelines that might have appeared in the archive since we
- * started recovery.
- *
- * If there are any, the function changes recovery target TLI to the latest
- * one and returns 'true'.
- */
-static bool
-rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
-{
-   List       *newExpectedTLEs;
-   bool        found;
-   ListCell   *cell;
-   TimeLineID  newtarget;
-   TimeLineID  oldtarget = recoveryTargetTLI;
-   TimeLineHistoryEntry *currentTle = NULL;
-
-   newtarget = findNewestTimeLine(recoveryTargetTLI);
-   if (newtarget == recoveryTargetTLI)
-   {
-       /* No new timelines found */
-       return false;
-   }
-
-   /*
-    * Determine the list of expected TLIs for the new TLI
-    */
-
-   newExpectedTLEs = readTimeLineHistory(newtarget);
-
-   /*
-    * If the current timeline is not part of the history of the new timeline,
-    * we cannot proceed to it.
-    */
-   found = false;
-   foreach(cell, newExpectedTLEs)
-   {
-       currentTle = (TimeLineHistoryEntry *) lfirst(cell);
-
-       if (currentTle->tli == recoveryTargetTLI)
-       {
-           found = true;
-           break;
-       }
-   }
-   if (!found)
-   {
-       ereport(LOG,
-               (errmsg("new timeline %u is not a child of database system timeline %u",
-                       newtarget,
-                       replayTLI)));
-       return false;
-   }
-
-   /*
-    * The current timeline was found in the history file, but check that the
-    * next timeline was forked off from it *after* the current recovery
-    * location.
-    */
-   if (currentTle->end < replayLSN)
-   {
-       ereport(LOG,
-               (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
-                       newtarget,
-                       replayTLI,
-                       LSN_FORMAT_ARGS(replayLSN))));
-       return false;
-   }
-
-   /* The new timeline history seems valid. Switch target */
-   recoveryTargetTLI = newtarget;
-   list_free_deep(expectedTLEs);
-   expectedTLEs = newExpectedTLEs;
-
-   /*
-    * As in StartupXLOG(), try to ensure we have all the history files
-    * between the old target and new target in pg_wal.
-    */
-   restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
-
-   ereport(LOG,
-           (errmsg("new target timeline is %u",
-                   recoveryTargetTLI)));
-
-   return true;
-}
-
-/*
- * I/O routines for pg_control
- *
- * *ControlFile is a buffer in shared memory that holds an image of the
- * contents of pg_control.  WriteControlFile() initializes pg_control
- * given a preloaded buffer, ReadControlFile() loads the buffer from
- * the pg_control file (during postmaster or standalone-backend startup),
- * and UpdateControlFile() rewrites pg_control after we modify xlog state.
- * InitControlFile() fills the buffer with initial values.
- *
- * For simplicity, WriteControlFile() initializes the fields of pg_control
- * that are related to checking backend/database compatibility, and
- * ReadControlFile() verifies they are correct.  We could split out the
- * I/O and compatibility-check functions, but there seems no need currently.
- */
-
-static void
-InitControlFile(uint64 sysidentifier)
-{
-   char        mock_auth_nonce[MOCK_AUTH_NONCE_LEN];
-
-   /*
-    * Generate a random nonce. This is used for authentication requests that
-    * will fail because the user does not exist. The nonce is used to create
-    * a genuine-looking password challenge for the non-existent user, in lieu
-    * of an actual stored password.
-    */
-   if (!pg_strong_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
-       ereport(PANIC,
-               (errcode(ERRCODE_INTERNAL_ERROR),
-                errmsg("could not generate secret authorization token")));
-
-   memset(ControlFile, 0, sizeof(ControlFileData));
-   /* Initialize pg_control status fields */
-   ControlFile->system_identifier = sysidentifier;
-   memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
-   ControlFile->state = DB_SHUTDOWNED;
-   ControlFile->unloggedLSN = FirstNormalUnloggedLSN;
+   memset(ControlFile, 0, sizeof(ControlFileData));
+   /* Initialize pg_control status fields */
+   ControlFile->system_identifier = sysidentifier;
+   memcpy(ControlFile->mock_authentication_nonce, mock_auth_nonce, MOCK_AUTH_NONCE_LEN);
+   ControlFile->state = DB_SHUTDOWNED;
+   ControlFile->unloggedLSN = FirstNormalUnloggedLSN;
  
     /* Set important parameter values for use when replaying WAL */
     ControlFile->MaxConnections = MaxConnections;
@@ -5038,7 +4203,7 @@ ReadControlFile(void)
   * Utility wrapper to update the control file.  Note that the control
   * file gets flushed.
   */
-void
+static void
  UpdateControlFile(void)
  {
     update_controlfile(DataDir, ControlFile, true);
@@ -5316,16 +4481,12 @@ XLOGShmemInit(void)
      */
     XLogCtl->XLogCacheBlck = XLOGbuffers - 1;
     XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH;
-   XLogCtl->SharedHotStandbyActive = false;
     XLogCtl->InstallXLogFileSegmentActive = false;
-   XLogCtl->SharedPromoteIsTriggered = false;
     XLogCtl->WalWriterSleeping = false;
  
     SpinLockInit(&XLogCtl->Insert.insertpos_lck);
     SpinLockInit(&XLogCtl->info_lck);
     SpinLockInit(&XLogCtl->ulsn_lck);
-   InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
-   ConditionVariableInit(&XLogCtl->recoveryNotPausedCV);
  }
  
  /*
@@ -5511,175 +4672,6 @@ str_time(pg_time_t tnow)
     return buf;
  }
  
-/*
- * See if there are any recovery signal files and if so, set state for
- * recovery.
- *
- * See if there is a recovery command file (recovery.conf), and if so
- * throw an ERROR since as of PG12 we no longer recognize that.
- */
-static void
-readRecoverySignalFile(void)
-{
-   struct stat stat_buf;
-
-   if (IsBootstrapProcessingMode())
-       return;
-
-   /*
-    * Check for old recovery API file: recovery.conf
-    */
-   if (stat(RECOVERY_COMMAND_FILE, &stat_buf) == 0)
-       ereport(FATAL,
-               (errcode_for_file_access(),
-                errmsg("using recovery command file \"%s\" is not supported",
-                       RECOVERY_COMMAND_FILE)));
-
-   /*
-    * Remove unused .done file, if present. Ignore if absent.
-    */
-   unlink(RECOVERY_COMMAND_DONE);
-
-   /*
-    * Check for recovery signal files and if found, fsync them since they
-    * represent server state information.  We don't sweat too much about the
-    * possibility of fsync failure, however.
-    *
-    * If present, standby signal file takes precedence. If neither is present
-    * then we won't enter archive recovery.
-    */
-   if (stat(STANDBY_SIGNAL_FILE, &stat_buf) == 0)
-   {
-       int         fd;
-
-       fd = BasicOpenFilePerm(STANDBY_SIGNAL_FILE, O_RDWR | PG_BINARY,
-                              S_IRUSR | S_IWUSR);
-       if (fd >= 0)
-       {
-           (void) pg_fsync(fd);
-           close(fd);
-       }
-       standby_signal_file_found = true;
-   }
-   else if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
-   {
-       int         fd;
-
-       fd = BasicOpenFilePerm(RECOVERY_SIGNAL_FILE, O_RDWR | PG_BINARY,
-                              S_IRUSR | S_IWUSR);
-       if (fd >= 0)
-       {
-           (void) pg_fsync(fd);
-           close(fd);
-       }
-       recovery_signal_file_found = true;
-   }
-
-   StandbyModeRequested = false;
-   ArchiveRecoveryRequested = false;
-   if (standby_signal_file_found)
-   {
-       StandbyModeRequested = true;
-       ArchiveRecoveryRequested = true;
-   }
-   else if (recovery_signal_file_found)
-   {
-       StandbyModeRequested = false;
-       ArchiveRecoveryRequested = true;
-   }
-   else
-       return;
-
-   /*
-    * We don't support standby mode in standalone backends; that requires
-    * other processes such as the WAL receiver to be alive.
-    */
-   if (StandbyModeRequested && !IsUnderPostmaster)
-       ereport(FATAL,
-               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                errmsg("standby mode is not supported by single-user servers")));
-}
-
-static void
-validateRecoveryParameters(void)
-{
-   if (!ArchiveRecoveryRequested)
-       return;
-
-   /*
-    * Check for compulsory parameters
-    */
-   if (StandbyModeRequested)
-   {
-       if ((PrimaryConnInfo == NULL || strcmp(PrimaryConnInfo, "") == 0) &&
-           (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0))
-           ereport(WARNING,
-                   (errmsg("specified neither primary_conninfo nor restore_command"),
-                    errhint("The database server will regularly poll the pg_wal subdirectory to check for files placed there.")));
-   }
-   else
-   {
-       if (recoveryRestoreCommand == NULL ||
-           strcmp(recoveryRestoreCommand, "") == 0)
-           ereport(FATAL,
-                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                    errmsg("must specify restore_command when standby mode is not enabled")));
-   }
-
-   /*
-    * Override any inconsistent requests. Note that this is a change of
-    * behaviour in 9.5; prior to this we simply ignored a request to pause if
-    * hot_standby = off, which was surprising behaviour.
-    */
-   if (recoveryTargetAction == RECOVERY_TARGET_ACTION_PAUSE &&
-       !EnableHotStandby)
-       recoveryTargetAction = RECOVERY_TARGET_ACTION_SHUTDOWN;
-
-   /*
-    * Final parsing of recovery_target_time string; see also
-    * check_recovery_target_time().
-    */
-   if (recoveryTarget == RECOVERY_TARGET_TIME)
-   {
-       recoveryTargetTime = DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in,
-                                                                    CStringGetDatum(recovery_target_time_string),
-                                                                    ObjectIdGetDatum(InvalidOid),
-                                                                    Int32GetDatum(-1)));
-   }
-
-   /*
-    * If user specified recovery_target_timeline, validate it or compute the
-    * "latest" value.  We can't do this until after we've gotten the restore
-    * command and set InArchiveRecovery, because we need to fetch timeline
-    * history files from the archive.
-    */
-   if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
-   {
-       TimeLineID  rtli = recoveryTargetTLIRequested;
-
-       /* Timeline 1 does not have a history file, all else should */
-       if (rtli != 1 && !existsTimeLineHistory(rtli))
-           ereport(FATAL,
-                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                    errmsg("recovery target timeline %u does not exist",
-                           rtli)));
-       recoveryTargetTLI = rtli;
-   }
-   else if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
-   {
-       /* We start the "latest" search from pg_control's timeline */
-       recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
-   }
-   else
-   {
-       /*
-        * else we just use the recoveryTargetTLI as already read from
-        * ControlFile
-        */
-       Assert(recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_CONTROLFILE);
-   }
-}
-
  /*
   * Initialize the first WAL segment on new timeline.
   */
@@ -5841,777 +4833,31 @@ CleanupAfterArchiveRecovery(TimeLineID EndOfLogTLI, XLogRecPtr EndOfLog,
  }
  
  /*
- * Extract timestamp from WAL record.
+ * Check to see if required parameters are set high enough on this server
+ * for various aspects of recovery operation.
   *
- * If the record contains a timestamp, returns true, and saves the timestamp
- * in *recordXtime. If the record type has no timestamp, returns false.
- * Currently, only transaction commit/abort records and restore points contain
- * timestamps.
+ * Note that all the parameters which this function tests need to be
+ * listed in Administrator's Overview section in high-availability.sgml.
+ * If you change them, don't forget to update the list.
   */
-static bool
-getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
+static void
+CheckRequiredParameterValues(void)
  {
-   uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
-   uint8       xact_info = info & XLOG_XACT_OPMASK;
-   uint8       rmid = XLogRecGetRmid(record);
-
-   if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
-   {
-       *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
-       return true;
-   }
-   if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT ||
-                              xact_info == XLOG_XACT_COMMIT_PREPARED))
-   {
-       *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
-       return true;
-   }
-   if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT ||
-                              xact_info == XLOG_XACT_ABORT_PREPARED))
+   /*
+    * For archive recovery, the WAL must be generated with at least 'replica'
+    * wal_level.
+    */
+   if (ArchiveRecoveryRequested && ControlFile->wal_level == WAL_LEVEL_MINIMAL)
     {
-       *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
-       return true;
+       ereport(FATAL,
+               (errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
+                errdetail("This happens if you temporarily set wal_level=minimal on the server."),
+                errhint("Use a backup taken after setting wal_level to higher than minimal.")));
     }
-   return false;
-}
-
-/*
- * For point-in-time recovery, this function decides whether we want to
- * stop applying the XLOG before the current record.
- *
- * Returns true if we are stopping, false otherwise. If stopping, some
- * information is saved in recoveryStopXid et al for use in annotating the
- * new timeline's history file.
- */
-static bool
-recoveryStopsBefore(XLogReaderState *record)
-{
-   bool        stopsHere = false;
-   uint8       xact_info;
-   bool        isCommit;
-   TimestampTz recordXtime = 0;
-   TransactionId recordXid;
  
     /*
-    * Ignore recovery target settings when not in archive recovery (meaning
-    * we are in crash recovery).
-    */
-   if (!ArchiveRecoveryRequested)
-       return false;
-
-   /* Check if we should stop as soon as reaching consistency */
-   if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
-   {
-       ereport(LOG,
-               (errmsg("recovery stopping after reaching consistency")));
-
-       recoveryStopAfter = false;
-       recoveryStopXid = InvalidTransactionId;
-       recoveryStopLSN = InvalidXLogRecPtr;
-       recoveryStopTime = 0;
-       recoveryStopName[0] = '\0';
-       return true;
-   }
-
-   /* Check if target LSN has been reached */
-   if (recoveryTarget == RECOVERY_TARGET_LSN &&
-       !recoveryTargetInclusive &&
-       record->ReadRecPtr >= recoveryTargetLSN)
-   {
-       recoveryStopAfter = false;
-       recoveryStopXid = InvalidTransactionId;
-       recoveryStopLSN = record->ReadRecPtr;
-       recoveryStopTime = 0;
-       recoveryStopName[0] = '\0';
-       ereport(LOG,
-               (errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
-                       LSN_FORMAT_ARGS(recoveryStopLSN))));
-       return true;
-   }
-
-   /* Otherwise we only consider stopping before COMMIT or ABORT records. */
-   if (XLogRecGetRmid(record) != RM_XACT_ID)
-       return false;
-
-   xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
-
-   if (xact_info == XLOG_XACT_COMMIT)
-   {
-       isCommit = true;
-       recordXid = XLogRecGetXid(record);
-   }
-   else if (xact_info == XLOG_XACT_COMMIT_PREPARED)
-   {
-       xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
-       xl_xact_parsed_commit parsed;
-
-       isCommit = true;
-       ParseCommitRecord(XLogRecGetInfo(record),
-                         xlrec,
-                         &parsed);
-       recordXid = parsed.twophase_xid;
-   }
-   else if (xact_info == XLOG_XACT_ABORT)
-   {
-       isCommit = false;
-       recordXid = XLogRecGetXid(record);
-   }
-   else if (xact_info == XLOG_XACT_ABORT_PREPARED)
-   {
-       xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
-       xl_xact_parsed_abort parsed;
-
-       isCommit = false;
-       ParseAbortRecord(XLogRecGetInfo(record),
-                        xlrec,
-                        &parsed);
-       recordXid = parsed.twophase_xid;
-   }
-   else
-       return false;
-
-   if (recoveryTarget == RECOVERY_TARGET_XID && !recoveryTargetInclusive)
-   {
-       /*
-        * There can be only one transaction end record with this exact
-        * transactionid
-        *
-        * when testing for an xid, we MUST test for equality only, since
-        * transactions are numbered in the order they start, not the order
-        * they complete. A higher numbered xid will complete before you about
-        * 50% of the time...
-        */
-       stopsHere = (recordXid == recoveryTargetXid);
-   }
-
-   if (recoveryTarget == RECOVERY_TARGET_TIME &&
-       getRecordTimestamp(record, &recordXtime))
-   {
-       /*
-        * There can be many transactions that share the same commit time, so
-        * we stop after the last one, if we are inclusive, or stop at the
-        * first one if we are exclusive
-        */
-       if (recoveryTargetInclusive)
-           stopsHere = (recordXtime > recoveryTargetTime);
-       else
-           stopsHere = (recordXtime >= recoveryTargetTime);
-   }
-
-   if (stopsHere)
-   {
-       recoveryStopAfter = false;
-       recoveryStopXid = recordXid;
-       recoveryStopTime = recordXtime;
-       recoveryStopLSN = InvalidXLogRecPtr;
-       recoveryStopName[0] = '\0';
-
-       if (isCommit)
-       {
-           ereport(LOG,
-                   (errmsg("recovery stopping before commit of transaction %u, time %s",
-                           recoveryStopXid,
-                           timestamptz_to_str(recoveryStopTime))));
-       }
-       else
-       {
-           ereport(LOG,
-                   (errmsg("recovery stopping before abort of transaction %u, time %s",
-                           recoveryStopXid,
-                           timestamptz_to_str(recoveryStopTime))));
-       }
-   }
-
-   return stopsHere;
-}
-
-/*
- * Same as recoveryStopsBefore, but called after applying the record.
- *
- * We also track the timestamp of the latest applied COMMIT/ABORT
- * record in XLogCtl->recoveryLastXTime.
- */
-static bool
-recoveryStopsAfter(XLogReaderState *record)
-{
-   uint8       info;
-   uint8       xact_info;
-   uint8       rmid;
-   TimestampTz recordXtime;
-
-   /*
-    * Ignore recovery target settings when not in archive recovery (meaning
-    * we are in crash recovery).
-    */
-   if (!ArchiveRecoveryRequested)
-       return false;
-
-   info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
-   rmid = XLogRecGetRmid(record);
-
-   /*
-    * There can be many restore points that share the same name; we stop at
-    * the first one.
-    */
-   if (recoveryTarget == RECOVERY_TARGET_NAME &&
-       rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
-   {
-       xl_restore_point *recordRestorePointData;
-
-       recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
-
-       if (strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
-       {
-           recoveryStopAfter = true;
-           recoveryStopXid = InvalidTransactionId;
-           recoveryStopLSN = InvalidXLogRecPtr;
-           (void) getRecordTimestamp(record, &recoveryStopTime);
-           strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
-
-           ereport(LOG,
-                   (errmsg("recovery stopping at restore point \"%s\", time %s",
-                           recoveryStopName,
-                           timestamptz_to_str(recoveryStopTime))));
-           return true;
-       }
-   }
-
-   /* Check if the target LSN has been reached */
-   if (recoveryTarget == RECOVERY_TARGET_LSN &&
-       recoveryTargetInclusive &&
-       record->ReadRecPtr >= recoveryTargetLSN)
-   {
-       recoveryStopAfter = true;
-       recoveryStopXid = InvalidTransactionId;
-       recoveryStopLSN = record->ReadRecPtr;
-       recoveryStopTime = 0;
-       recoveryStopName[0] = '\0';
-       ereport(LOG,
-               (errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
-                       LSN_FORMAT_ARGS(recoveryStopLSN))));
-       return true;
-   }
-
-   if (rmid != RM_XACT_ID)
-       return false;
-
-   xact_info = info & XLOG_XACT_OPMASK;
-
-   if (xact_info == XLOG_XACT_COMMIT ||
-       xact_info == XLOG_XACT_COMMIT_PREPARED ||
-       xact_info == XLOG_XACT_ABORT ||
-       xact_info == XLOG_XACT_ABORT_PREPARED)
-   {
-       TransactionId recordXid;
-
-       /* Update the last applied transaction timestamp */
-       if (getRecordTimestamp(record, &recordXtime))
-           SetLatestXTime(recordXtime);
-
-       /* Extract the XID of the committed/aborted transaction */
-       if (xact_info == XLOG_XACT_COMMIT_PREPARED)
-       {
-           xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
-           xl_xact_parsed_commit parsed;
-
-           ParseCommitRecord(XLogRecGetInfo(record),
-                             xlrec,
-                             &parsed);
-           recordXid = parsed.twophase_xid;
-       }
-       else if (xact_info == XLOG_XACT_ABORT_PREPARED)
-       {
-           xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
-           xl_xact_parsed_abort parsed;
-
-           ParseAbortRecord(XLogRecGetInfo(record),
-                            xlrec,
-                            &parsed);
-           recordXid = parsed.twophase_xid;
-       }
-       else
-           recordXid = XLogRecGetXid(record);
-
-       /*
-        * There can be only one transaction end record with this exact
-        * transactionid
-        *
-        * when testing for an xid, we MUST test for equality only, since
-        * transactions are numbered in the order they start, not the order
-        * they complete. A higher numbered xid will complete before you about
-        * 50% of the time...
-        */
-       if (recoveryTarget == RECOVERY_TARGET_XID && recoveryTargetInclusive &&
-           recordXid == recoveryTargetXid)
-       {
-           recoveryStopAfter = true;
-           recoveryStopXid = recordXid;
-           recoveryStopTime = recordXtime;
-           recoveryStopLSN = InvalidXLogRecPtr;
-           recoveryStopName[0] = '\0';
-
-           if (xact_info == XLOG_XACT_COMMIT ||
-               xact_info == XLOG_XACT_COMMIT_PREPARED)
-           {
-               ereport(LOG,
-                       (errmsg("recovery stopping after commit of transaction %u, time %s",
-                               recoveryStopXid,
-                               timestamptz_to_str(recoveryStopTime))));
-           }
-           else if (xact_info == XLOG_XACT_ABORT ||
-                    xact_info == XLOG_XACT_ABORT_PREPARED)
-           {
-               ereport(LOG,
-                       (errmsg("recovery stopping after abort of transaction %u, time %s",
-                               recoveryStopXid,
-                               timestamptz_to_str(recoveryStopTime))));
-           }
-           return true;
-       }
-   }
-
-   /* Check if we should stop as soon as reaching consistency */
-   if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
-   {
-       ereport(LOG,
-               (errmsg("recovery stopping after reaching consistency")));
-
-       recoveryStopAfter = true;
-       recoveryStopXid = InvalidTransactionId;
-       recoveryStopTime = 0;
-       recoveryStopLSN = InvalidXLogRecPtr;
-       recoveryStopName[0] = '\0';
-       return true;
-   }
-
-   return false;
-}
-
-/*
- * Create a comment for the history file to explain why and where
- * timeline changed.
- */
-static char *
-getRecoveryStopReason(void)
-{
-   char        reason[200];
-
-   if (recoveryTarget == RECOVERY_TARGET_XID)
-       snprintf(reason, sizeof(reason),
-                "%s transaction %u",
-                recoveryStopAfter ? "after" : "before",
-                recoveryStopXid);
-   else if (recoveryTarget == RECOVERY_TARGET_TIME)
-       snprintf(reason, sizeof(reason),
-                "%s %s\n",
-                recoveryStopAfter ? "after" : "before",
-                timestamptz_to_str(recoveryStopTime));
-   else if (recoveryTarget == RECOVERY_TARGET_LSN)
-       snprintf(reason, sizeof(reason),
-                "%s LSN %X/%X\n",
-                recoveryStopAfter ? "after" : "before",
-                LSN_FORMAT_ARGS(recoveryStopLSN));
-   else if (recoveryTarget == RECOVERY_TARGET_NAME)
-       snprintf(reason, sizeof(reason),
-                "at restore point \"%s\"",
-                recoveryStopName);
-   else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
-       snprintf(reason, sizeof(reason), "reached consistency");
-   else
-       snprintf(reason, sizeof(reason), "no recovery target specified");
-
-   return pstrdup(reason);
-}
-
-/*
- * Wait until shared recoveryPauseState is set to RECOVERY_NOT_PAUSED.
- *
- * endOfRecovery is true if the recovery target is reached and
- * the paused state starts at the end of recovery because of
- * recovery_target_action=pause, and false otherwise.
- */
-static void
-recoveryPausesHere(bool endOfRecovery)
-{
-   /* Don't pause unless users can connect! */
-   if (!LocalHotStandbyActive)
-       return;
-
-   /* Don't pause after standby promotion has been triggered */
-   if (LocalPromoteIsTriggered)
-       return;
-
-   if (endOfRecovery)
-       ereport(LOG,
-               (errmsg("pausing at the end of recovery"),
-                errhint("Execute pg_wal_replay_resume() to promote.")));
-   else
-       ereport(LOG,
-               (errmsg("recovery has paused"),
-                errhint("Execute pg_wal_replay_resume() to continue.")));
-
-   /* loop until recoveryPauseState is set to RECOVERY_NOT_PAUSED */
-   while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
-   {
-       HandleStartupProcInterrupts();
-       if (CheckForStandbyTrigger())
-           return;
-
-       /*
-        * If recovery pause is requested then set it paused.  While we are in
-        * the loop, user might resume and pause again so set this every time.
-        */
-       ConfirmRecoveryPaused();
-
-       /*
-        * We wait on a condition variable that will wake us as soon as the
-        * pause ends, but we use a timeout so we can check the above exit
-        * condition periodically too.
-        */
-       ConditionVariableTimedSleep(&XLogCtl->recoveryNotPausedCV, 1000,
-                                   WAIT_EVENT_RECOVERY_PAUSE);
-   }
-   ConditionVariableCancelSleep();
-}
-
-/*
- * Get the current state of the recovery pause request.
- */
-RecoveryPauseState
-GetRecoveryPauseState(void)
-{
-   RecoveryPauseState state;
-
-   SpinLockAcquire(&XLogCtl->info_lck);
-   state = XLogCtl->recoveryPauseState;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   return state;
-}
-
-/*
- * Set the recovery pause state.
- *
- * If recovery pause is requested then sets the recovery pause state to
- * 'pause requested' if it is not already 'paused'.  Otherwise, sets it
- * to 'not paused' to resume the recovery.  The recovery pause will be
- * confirmed by the ConfirmRecoveryPaused.
- */
-void
-SetRecoveryPause(bool recoveryPause)
-{
-   SpinLockAcquire(&XLogCtl->info_lck);
-
-   if (!recoveryPause)
-       XLogCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
-   else if (XLogCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
-       XLogCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED;
-
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   if (!recoveryPause)
-       ConditionVariableBroadcast(&XLogCtl->recoveryNotPausedCV);
-}
-
-/*
- * Confirm the recovery pause by setting the recovery pause state to
- * RECOVERY_PAUSED.
- */
-static void
-ConfirmRecoveryPaused(void)
-{
-   /* If recovery pause is requested then set it paused */
-   SpinLockAcquire(&XLogCtl->info_lck);
-   if (XLogCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
-       XLogCtl->recoveryPauseState = RECOVERY_PAUSED;
-   SpinLockRelease(&XLogCtl->info_lck);
-}
-
-/*
- * When recovery_min_apply_delay is set, we wait long enough to make sure
- * certain record types are applied at least that interval behind the primary.
- *
- * Returns true if we waited.
- *
- * Note that the delay is calculated between the WAL record log time and
- * the current time on standby. We would prefer to keep track of when this
- * standby received each WAL record, which would allow a more consistent
- * approach and one not affected by time synchronisation issues, but that
- * is significantly more effort and complexity for little actual gain in
- * usability.
- */
-static bool
-recoveryApplyDelay(XLogReaderState *record)
-{
-   uint8       xact_info;
-   TimestampTz xtime;
-   TimestampTz delayUntil;
-   long        msecs;
-
-   /* nothing to do if no delay configured */
-   if (recovery_min_apply_delay <= 0)
-       return false;
-
-   /* no delay is applied on a database not yet consistent */
-   if (!reachedConsistency)
-       return false;
-
-   /* nothing to do if crash recovery is requested */
-   if (!ArchiveRecoveryRequested)
-       return false;
-
-   /*
-    * Is it a COMMIT record?
-    *
-    * We deliberately choose not to delay aborts since they have no effect on
-    * MVCC. We already allow replay of records that don't have a timestamp,
-    * so there is already opportunity for issues caused by early conflicts on
-    * standbys.
-    */
-   if (XLogRecGetRmid(record) != RM_XACT_ID)
-       return false;
-
-   xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
-
-   if (xact_info != XLOG_XACT_COMMIT &&
-       xact_info != XLOG_XACT_COMMIT_PREPARED)
-       return false;
-
-   if (!getRecordTimestamp(record, &xtime))
-       return false;
-
-   delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
-
-   /*
-    * Exit without arming the latch if it's already past time to apply this
-    * record
-    */
-   msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(), delayUntil);
-   if (msecs <= 0)
-       return false;
-
-   while (true)
-   {
-       ResetLatch(&XLogCtl->recoveryWakeupLatch);
-
-       /*
-        * This might change recovery_min_apply_delay or the trigger file's
-        * location.
-        */
-       HandleStartupProcInterrupts();
-
-       if (CheckForStandbyTrigger())
-           break;
-
-       /*
-        * Recalculate delayUntil as recovery_min_apply_delay could have
-        * changed while waiting in this loop.
-        */
-       delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
-
-       /*
-        * Wait for difference between GetCurrentTimestamp() and delayUntil.
-        */
-       msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
-                                               delayUntil);
-
-       if (msecs <= 0)
-           break;
-
-       elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
-
-       (void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
-                        WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
-                        msecs,
-                        WAIT_EVENT_RECOVERY_APPLY_DELAY);
-   }
-   return true;
-}
-
-/*
- * Save timestamp of latest processed commit/abort record.
- *
- * We keep this in XLogCtl, not a simple static variable, so that it can be
- * seen by processes other than the startup process.  Note in particular
- * that CreateRestartPoint is executed in the checkpointer.
- */
-static void
-SetLatestXTime(TimestampTz xtime)
-{
-   SpinLockAcquire(&XLogCtl->info_lck);
-   XLogCtl->recoveryLastXTime = xtime;
-   SpinLockRelease(&XLogCtl->info_lck);
-}
-
-/*
- * Fetch timestamp of latest processed commit/abort record.
- */
-TimestampTz
-GetLatestXTime(void)
-{
-   TimestampTz xtime;
-
-   SpinLockAcquire(&XLogCtl->info_lck);
-   xtime = XLogCtl->recoveryLastXTime;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   return xtime;
-}
-
-/*
- * Save timestamp of the next chunk of WAL records to apply.
- *
- * We keep this in XLogCtl, not a simple static variable, so that it can be
- * seen by all backends.
- */
-static void
-SetCurrentChunkStartTime(TimestampTz xtime)
-{
-   SpinLockAcquire(&XLogCtl->info_lck);
-   XLogCtl->currentChunkStartTime = xtime;
-   SpinLockRelease(&XLogCtl->info_lck);
-}
-
-/*
- * Fetch timestamp of latest processed commit/abort record.
- * Startup process maintains an accurate local copy in XLogReceiptTime
- */
-TimestampTz
-GetCurrentChunkReplayStartTime(void)
-{
-   TimestampTz xtime;
-
-   SpinLockAcquire(&XLogCtl->info_lck);
-   xtime = XLogCtl->currentChunkStartTime;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   return xtime;
-}
-
-/*
- * Returns time of receipt of current chunk of XLOG data, as well as
- * whether it was received from streaming replication or from archives.
- */
-void
-GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
-{
-   /*
-    * This must be executed in the startup process, since we don't export the
-    * relevant state to shared memory.
-    */
-   Assert(InRecovery);
-
-   *rtime = XLogReceiptTime;
-   *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
-}
-
-/*
- * Note that text field supplied is a parameter name and does not require
- * translation
- */
-static void
-RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
-{
-   if (currValue < minValue)
-   {
-       if (LocalHotStandbyActive)
-       {
-           bool        warned_for_promote = false;
-
-           ereport(WARNING,
-                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                    errmsg("hot standby is not possible because of insufficient parameter settings"),
-                    errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
-                              param_name,
-                              currValue,
-                              minValue)));
-
-           SetRecoveryPause(true);
-
-           ereport(LOG,
-                   (errmsg("recovery has paused"),
-                    errdetail("If recovery is unpaused, the server will shut down."),
-                    errhint("You can then restart the server after making the necessary configuration changes.")));
-
-           while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
-           {
-               HandleStartupProcInterrupts();
-
-               if (CheckForStandbyTrigger())
-               {
-                   if (!warned_for_promote)
-                       ereport(WARNING,
-                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("promotion is not possible because of insufficient parameter settings"),
-
-                       /*
-                        * Repeat the detail from above so it's easy to find
-                        * in the log.
-                        */
-                                errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
-                                          param_name,
-                                          currValue,
-                                          minValue),
-                                errhint("Restart the server after making the necessary configuration changes.")));
-                   warned_for_promote = true;
-               }
-
-               /*
-                * If recovery pause is requested then set it paused.  While
-                * we are in the loop, user might resume and pause again so
-                * set this every time.
-                */
-               ConfirmRecoveryPaused();
-
-               /*
-                * We wait on a condition variable that will wake us as soon
-                * as the pause ends, but we use a timeout so we can check the
-                * above conditions periodically too.
-                */
-               ConditionVariableTimedSleep(&XLogCtl->recoveryNotPausedCV, 1000,
-                                           WAIT_EVENT_RECOVERY_PAUSE);
-           }
-           ConditionVariableCancelSleep();
-       }
-
-       ereport(FATAL,
-               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                errmsg("recovery aborted because of insufficient parameter settings"),
-       /* Repeat the detail from above so it's easy to find in the log. */
-                errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
-                          param_name,
-                          currValue,
-                          minValue),
-                errhint("You can restart the server after making the necessary configuration changes.")));
-   }
-}
-
-/*
- * Check to see if required parameters are set high enough on this server
- * for various aspects of recovery operation.
- *
- * Note that all the parameters which this function tests need to be
- * listed in Administrator's Overview section in high-availability.sgml.
- * If you change them, don't forget to update the list.
- */
-static void
-CheckRequiredParameterValues(void)
-{
-   /*
-    * For archive recovery, the WAL must be generated with at least 'replica'
-    * wal_level.
-    */
-   if (ArchiveRecoveryRequested && ControlFile->wal_level == WAL_LEVEL_MINIMAL)
-   {
-       ereport(FATAL,
-               (errmsg("WAL was generated with wal_level=minimal, cannot continue recovering"),
-                errdetail("This happens if you temporarily set wal_level=minimal on the server."),
-                errhint("Use a backup taken after setting wal_level to higher than minimal.")));
-   }
-
-   /*
-    * For Hot Standby, the WAL must be generated with 'replica' mode, and we
-    * must have at least as many backend slots as the primary.
+    * For Hot Standby, the WAL must be generated with 'replica' mode, and we
+    * must have at least as many backend slots as the primary.
      */
     if (ArchiveRecoveryRequested && EnableHotStandby)
     {
@@ -6643,26 +4889,17 @@ StartupXLOG(void)
     XLogCtlInsert *Insert;
     CheckPoint  checkPoint;
     bool        wasShutdown;
-   bool        reachedRecoveryTarget = false;
-   bool        haveBackupLabel = false;
-   bool        haveTblspcMap = false;
-   XLogRecPtr  RecPtr,
-               LastRec,
-               checkPointLoc,
-               EndOfLog;
+   bool        haveTblspcMap;
+   bool        haveBackupLabel;
+   XLogRecPtr  EndOfLog;
     TimeLineID  EndOfLogTLI;
-   TimeLineID  replayTLI,
-               newTLI;
+   TimeLineID  newTLI;
     bool        performedWalRecovery;
-   char       *recoveryStopReason;
-   XLogRecord *record;
+   EndOfWalRecoveryInfo *endOfRecoveryInfo;
+   XLogRecPtr  abortedRecPtr;
+   XLogRecPtr  missingContrecPtr;
     TransactionId oldestActiveXID;
-   bool        backupEndRequired = false;
-   bool        backupFromStandby = false;
-   XLogReaderState *xlogreader;
-   XLogPageReadPrivate private;
     bool        promoted = false;
-   struct stat st;
  
     /*
      * We should have an aux process resource owner to use, and we should not
@@ -6759,444 +4996,29 @@ StartupXLOG(void)
      *   this temporary data.
      *
      * - There might be data which we had written, intending to fsync it, but
-    *   which we had not actually fsync'd yet.  Therefore, a power failure in
-    *   the near future might cause earlier unflushed writes to be lost, even
-    *   though more recent data written to disk from here on would be
-    *   persisted.  To avoid that, fsync the entire data directory.
-    */
-   if (ControlFile->state != DB_SHUTDOWNED &&
-       ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
-   {
-       RemoveTempXlogFiles();
-       SyncDataDirectory();
-   }
-
-   /*---- BEGIN InitWalRecovery ----*/
-
-   /*
-    * Initialize on the assumption we want to recover to the latest timeline
-    * that's active according to pg_control.
-    */
-   if (ControlFile->minRecoveryPointTLI >
-       ControlFile->checkPointCopy.ThisTimeLineID)
-       recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
-   else
-       recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
-
-   /*
-    * Check for signal files, and if so set up state for offline recovery
-    */
-   readRecoverySignalFile();
-   validateRecoveryParameters();
-
-   if (ArchiveRecoveryRequested)
-   {
-       if (StandbyModeRequested)
-           ereport(LOG,
-                   (errmsg("entering standby mode")));
-       else if (recoveryTarget == RECOVERY_TARGET_XID)
-           ereport(LOG,
-                   (errmsg("starting point-in-time recovery to XID %u",
-                           recoveryTargetXid)));
-       else if (recoveryTarget == RECOVERY_TARGET_TIME)
-           ereport(LOG,
-                   (errmsg("starting point-in-time recovery to %s",
-                           timestamptz_to_str(recoveryTargetTime))));
-       else if (recoveryTarget == RECOVERY_TARGET_NAME)
-           ereport(LOG,
-                   (errmsg("starting point-in-time recovery to \"%s\"",
-                           recoveryTargetName)));
-       else if (recoveryTarget == RECOVERY_TARGET_LSN)
-           ereport(LOG,
-                   (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
-                           LSN_FORMAT_ARGS(recoveryTargetLSN))));
-       else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
-           ereport(LOG,
-                   (errmsg("starting point-in-time recovery to earliest consistent point")));
-       else
-           ereport(LOG,
-                   (errmsg("starting archive recovery")));
-   }
-
-   /*
-    * Take ownership of the wakeup latch if we're going to sleep during
-    * recovery.
-    */
-   if (ArchiveRecoveryRequested)
-       OwnLatch(&XLogCtl->recoveryWakeupLatch);
-
-   /* Set up XLOG reader facility */
-   MemSet(&private, 0, sizeof(XLogPageReadPrivate));
-   xlogreader =
-       XLogReaderAllocate(wal_segment_size, NULL,
-                          XL_ROUTINE(.page_read = &XLogPageRead,
-                                     .segment_open = NULL,
-                                     .segment_close = wal_segment_close),
-                          &private);
-   if (!xlogreader)
-       ereport(ERROR,
-               (errcode(ERRCODE_OUT_OF_MEMORY),
-                errmsg("out of memory"),
-                errdetail("Failed while allocating a WAL reading processor.")));
-   xlogreader->system_identifier = ControlFile->system_identifier;
-
-   /*
-    * Allocate two page buffers dedicated to WAL consistency checks.  We do
-    * it this way, rather than just making static arrays, for two reasons:
-    * (1) no need to waste the storage in most instantiations of the backend;
-    * (2) a static char array isn't guaranteed to have any particular
-    * alignment, whereas palloc() will provide MAXALIGN'd storage.
-    */
-   replay_image_masked = (char *) palloc(BLCKSZ);
-   primary_image_masked = (char *) palloc(BLCKSZ);
-
-   if (read_backup_label(&checkPointLoc, &replayTLI, &backupEndRequired,
-                         &backupFromStandby))
-   {
-       List       *tablespaces = NIL;
-
-       /*
-        * Archive recovery was requested, and thanks to the backup label
-        * file, we know how far we need to replay to reach consistency. Enter
-        * archive recovery directly.
-        */
-       InArchiveRecovery = true;
-       if (StandbyModeRequested)
-           StandbyMode = true;
-
-       /*
-        * When a backup_label file is present, we want to roll forward from
-        * the checkpoint it identifies, rather than using pg_control.
-        */
-       record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true,
-                                     replayTLI);
-       if (record != NULL)
-       {
-           memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
-           wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
-           ereport(DEBUG1,
-                   (errmsg_internal("checkpoint record is at %X/%X",
-                                    LSN_FORMAT_ARGS(checkPointLoc))));
-           InRecovery = true;  /* force recovery even if SHUTDOWNED */
-
-           /*
-            * Make sure that REDO location exists. This may not be the case
-            * if there was a crash during an online backup, which left a
-            * backup_label around that references a WAL segment that's
-            * already been archived.
-            */
-           if (checkPoint.redo < checkPointLoc)
-           {
-               XLogBeginRead(xlogreader, checkPoint.redo);
-               if (!ReadRecord(xlogreader, LOG, false,
-                               checkPoint.ThisTimeLineID))
-                   ereport(FATAL,
-                           (errmsg("could not find redo location referenced by checkpoint record"),
-                            errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
-                                    "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
-                                    "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
-                                    DataDir, DataDir, DataDir)));
-           }
-       }
-       else
-       {
-           ereport(FATAL,
-                   (errmsg("could not locate required checkpoint record"),
-                    errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
-                            "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
-                            "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
-                            DataDir, DataDir, DataDir)));
-           wasShutdown = false;    /* keep compiler quiet */
-       }
-
-       /* read the tablespace_map file if present and create symlinks. */
-       if (read_tablespace_map(&tablespaces))
-       {
-           ListCell   *lc;
-
-           foreach(lc, tablespaces)
-           {
-               tablespaceinfo *ti = lfirst(lc);
-               char       *linkloc;
-
-               linkloc = psprintf("pg_tblspc/%s", ti->oid);
-
-               /*
-                * Remove the existing symlink if any and Create the symlink
-                * under PGDATA.
-                */
-               remove_tablespace_symlink(linkloc);
-
-               if (symlink(ti->path, linkloc) < 0)
-                   ereport(ERROR,
-                           (errcode_for_file_access(),
-                            errmsg("could not create symbolic link \"%s\": %m",
-                                   linkloc)));
-
-               pfree(ti->oid);
-               pfree(ti->path);
-               pfree(ti);
-           }
-
-           /* set flag to delete it later */
-           haveTblspcMap = true;
-       }
-
-       /* set flag to delete it later */
-       haveBackupLabel = true;
-   }
-   else
-   {
-       /*
-        * If tablespace_map file is present without backup_label file, there
-        * is no use of such file.  There is no harm in retaining it, but it
-        * is better to get rid of the map file so that we don't have any
-        * redundant file in data directory and it will avoid any sort of
-        * confusion.  It seems prudent though to just rename the file out of
-        * the way rather than delete it completely, also we ignore any error
-        * that occurs in rename operation as even if map file is present
-        * without backup_label file, it is harmless.
-        */
-       if (stat(TABLESPACE_MAP, &st) == 0)
-       {
-           unlink(TABLESPACE_MAP_OLD);
-           if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
-               ereport(LOG,
-                       (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
-                               TABLESPACE_MAP, BACKUP_LABEL_FILE),
-                        errdetail("File \"%s\" was renamed to \"%s\".",
-                                  TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
-           else
-               ereport(LOG,
-                       (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
-                               TABLESPACE_MAP, BACKUP_LABEL_FILE),
-                        errdetail("Could not rename file \"%s\" to \"%s\": %m.",
-                                  TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
-       }
-
-       /*
-        * It's possible that archive recovery was requested, but we don't
-        * know how far we need to replay the WAL before we reach consistency.
-        * This can happen for example if a base backup is taken from a
-        * running server using an atomic filesystem snapshot, without calling
-        * pg_start/stop_backup. Or if you just kill a running primary server
-        * and put it into archive recovery by creating a recovery signal
-        * file.
-        *
-        * Our strategy in that case is to perform crash recovery first,
-        * replaying all the WAL present in pg_wal, and only enter archive
-        * recovery after that.
-        *
-        * But usually we already know how far we need to replay the WAL (up
-        * to minRecoveryPoint, up to backupEndPoint, or until we see an
-        * end-of-backup record), and we can enter archive recovery directly.
-        */
-       if (ArchiveRecoveryRequested &&
-           (ControlFile->minRecoveryPoint != InvalidXLogRecPtr ||
-            ControlFile->backupEndRequired ||
-            ControlFile->backupEndPoint != InvalidXLogRecPtr ||
-            ControlFile->state == DB_SHUTDOWNED))
-       {
-           InArchiveRecovery = true;
-           if (StandbyModeRequested)
-               StandbyMode = true;
-       }
-
-       /* Get the last valid checkpoint record. */
-       checkPointLoc = ControlFile->checkPoint;
-       RedoStartLSN = ControlFile->checkPointCopy.redo;
-       replayTLI = ControlFile->checkPointCopy.ThisTimeLineID;
-       record = ReadCheckpointRecord(xlogreader, checkPointLoc, 1, true,
-                                     replayTLI);
-       if (record != NULL)
-       {
-           ereport(DEBUG1,
-                   (errmsg_internal("checkpoint record is at %X/%X",
-                                    LSN_FORMAT_ARGS(checkPointLoc))));
-       }
-       else
-       {
-           /*
-            * We used to attempt to go back to a secondary checkpoint record
-            * here, but only when not in standby mode. We now just fail if we
-            * can't read the last checkpoint because this allows us to
-            * simplify processing around checkpoints.
-            */
-           ereport(PANIC,
-                   (errmsg("could not locate a valid checkpoint record")));
-       }
-       memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
-       wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
-   }
-
-   /*
-    * If the location of the checkpoint record is not on the expected
-    * timeline in the history of the requested timeline, we cannot proceed:
-    * the backup is not part of the history of the requested timeline.
-    */
-   Assert(expectedTLEs);       /* was initialized by reading checkpoint
-                                * record */
-   if (tliOfPointInHistory(checkPointLoc, expectedTLEs) !=
-       checkPoint.ThisTimeLineID)
-   {
-       XLogRecPtr  switchpoint;
-
-       /*
-        * tliSwitchPoint will throw an error if the checkpoint's timeline is
-        * not in expectedTLEs at all.
-        */
-       switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL);
-       ereport(FATAL,
-               (errmsg("requested timeline %u is not a child of this server's history",
-                       recoveryTargetTLI),
-                errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
-                          LSN_FORMAT_ARGS(ControlFile->checkPoint),
-                          ControlFile->checkPointCopy.ThisTimeLineID,
-                          LSN_FORMAT_ARGS(switchpoint))));
-   }
-
-   /*
-    * The min recovery point should be part of the requested timeline's
-    * history, too.
-    */
-   if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) &&
-       tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
-       ControlFile->minRecoveryPointTLI)
-       ereport(FATAL,
-               (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
-                       recoveryTargetTLI,
-                       LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
-                       ControlFile->minRecoveryPointTLI)));
-
-   LastRec = RecPtr = checkPointLoc;
-
-   ereport(DEBUG1,
-           (errmsg_internal("redo record is at %X/%X; shutdown %s",
-                            LSN_FORMAT_ARGS(checkPoint.redo),
-                            wasShutdown ? "true" : "false")));
-   ereport(DEBUG1,
-           (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
-                            U64FromFullTransactionId(checkPoint.nextXid),
-                            checkPoint.nextOid)));
-   ereport(DEBUG1,
-           (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
-                            checkPoint.nextMulti, checkPoint.nextMultiOffset)));
-   ereport(DEBUG1,
-           (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
-                            checkPoint.oldestXid, checkPoint.oldestXidDB)));
-   ereport(DEBUG1,
-           (errmsg_internal("oldest MultiXactId: %u, in database %u",
-                            checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
-   ereport(DEBUG1,
-           (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
-                            checkPoint.oldestCommitTsXid,
-                            checkPoint.newestCommitTsXid)));
-
-   /* sanity checks on the checkpoint record */
-   if (!TransactionIdIsNormal(XidFromFullTransactionId(checkPoint.nextXid)))
-       ereport(PANIC,
-               (errmsg("invalid next transaction ID")));
-   if (checkPoint.redo > checkPointLoc)
-       ereport(PANIC,
-               (errmsg("invalid redo in checkpoint record")));
-
-   /*
-    * Check whether we need to force recovery from WAL.  If it appears to
-    * have been a clean shutdown and we did not have a recovery signal file,
-    * then assume no recovery needed.
-    */
-   if (checkPoint.redo < checkPointLoc)
-   {
-       if (wasShutdown)
-           ereport(PANIC,
-                   (errmsg("invalid redo record in shutdown checkpoint")));
-       InRecovery = true;
-   }
-   else if (ControlFile->state != DB_SHUTDOWNED)
-       InRecovery = true;
-   else if (ArchiveRecoveryRequested)
-   {
-       /* force recovery due to presence of recovery signal file */
-       InRecovery = true;
-   }
-
-   /*
-    * If recovery is needed, update our in-memory copy of pg_control to show
-    * that we are recovering and to show the selected checkpoint as the place
-    * we are starting from. We also mark pg_control with any minimum recovery
-    * stop point obtained from a backup history file.
-    *
-    * We don't write the changes to disk yet, though. Only do that after
-    * initializing various subsystems.
-    */
-   if (InRecovery)
-   {
-       DBState     dbstate_at_startup;
-
-       dbstate_at_startup = ControlFile->state;
-       if (InArchiveRecovery)
-       {
-           ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
-       }
-       else
-       {
-           ereport(LOG,
-                   (errmsg("database system was not properly shut down; "
-                           "automatic recovery in progress")));
-           if (recoveryTargetTLI > ControlFile->checkPointCopy.ThisTimeLineID)
-               ereport(LOG,
-                       (errmsg("crash recovery starts in timeline %u "
-                               "and has target timeline %u",
-                               ControlFile->checkPointCopy.ThisTimeLineID,
-                               recoveryTargetTLI)));
-           ControlFile->state = DB_IN_CRASH_RECOVERY;
-       }
-       ControlFile->checkPoint = checkPointLoc;
-       ControlFile->checkPointCopy = checkPoint;
-       if (InArchiveRecovery)
-       {
-           /* initialize minRecoveryPoint if not set yet */
-           if (ControlFile->minRecoveryPoint < checkPoint.redo)
-           {
-               ControlFile->minRecoveryPoint = checkPoint.redo;
-               ControlFile->minRecoveryPointTLI = checkPoint.ThisTimeLineID;
-           }
-       }
-
-       /*
-        * Set backupStartPoint if we're starting recovery from a base backup.
-        *
-        * Also set backupEndPoint and use minRecoveryPoint as the backup end
-        * location if we're starting recovery from a base backup which was
-        * taken from a standby. In this case, the database system status in
-        * pg_control must indicate that the database was already in recovery.
-        * Usually that will be DB_IN_ARCHIVE_RECOVERY but also can be
-        * DB_SHUTDOWNED_IN_RECOVERY if recovery previously was interrupted
-        * before reaching this point; e.g. because restore_command or
-        * primary_conninfo were faulty.
-        *
-        * Any other state indicates that the backup somehow became corrupted
-        * and we can't sensibly continue with recovery.
-        */
-       if (haveBackupLabel)
-       {
-           ControlFile->backupStartPoint = checkPoint.redo;
-           ControlFile->backupEndRequired = backupEndRequired;
-
-           if (backupFromStandby)
-           {
-               if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY &&
-                   dbstate_at_startup != DB_SHUTDOWNED_IN_RECOVERY)
-                   ereport(FATAL,
-                           (errmsg("backup_label contains data inconsistent with control file"),
-                            errhint("This means that the backup is corrupted and you will "
-                                    "have to use another backup for recovery.")));
-               ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
-           }
-       }
+    *   which we had not actually fsync'd yet.  Therefore, a power failure in
+    *   the near future might cause earlier unflushed writes to be lost, even
+    *   though more recent data written to disk from here on would be
+    *   persisted.  To avoid that, fsync the entire data directory.
+    */
+   if (ControlFile->state != DB_SHUTDOWNED &&
+       ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
+   {
+       RemoveTempXlogFiles();
+       SyncDataDirectory();
     }
  
-   /*---- END InitWalRecovery ----*/
+   /*
+    * Prepare for WAL recovery if needed.
+    *
+    * InitWalRecovery analyzes the control file and the backup label file, if
+    * any.  It updates the in-memory ControlFile buffer according to the
+    * starting checkpoint, and sets InRecovery and ArchiveRecoveryRequested.
+    * It also applies the tablespace map file, if any.
+    */
+   InitWalRecovery(ControlFile, &wasShutdown,
+                   &haveBackupLabel, &haveTblspcMap);
+   checkPoint = ControlFile->checkPointCopy;
  
     /* initialize shared memory variables from the checkpoint record */
     ShmemVariableCache->nextXid = checkPoint.nextXid;
@@ -7272,13 +5094,6 @@ StartupXLOG(void)
     else
         XLogCtl->unloggedLSN = FirstNormalUnloggedLSN;
  
-   /*
-    * We must replay WAL entries using the same TimeLineID they were created
-    * under, so temporarily adopt the TLI indicated by the checkpoint (see
-    * also xlog_redo()).
-    */
-   replayTLI = checkPoint.ThisTimeLineID;
-
     /*
      * Copy any missing timeline history files between 'now' and the recovery
      * target timeline from archive to pg_wal. While we don't need those files
@@ -7291,7 +5106,7 @@ StartupXLOG(void)
      * are small, so it's better to copy them unnecessarily than not copy them
      * and regret later.
      */
-   restoreTimeLineHistoryFiles(replayTLI, recoveryTargetTLI);
+   restoreTimeLineHistoryFiles(checkPoint.ThisTimeLineID, recoveryTargetTLI);
  
     /*
      * Before running in recovery, scan pg_twophase and fill in its status to
@@ -7308,17 +5123,9 @@ StartupXLOG(void)
     RedoRecPtr = XLogCtl->RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
     doPageWrites = lastFullPageWrites;
  
-   /*
-    * Start recovery assuming that the final record isn't lost.
-    */
-   abortedRecPtr = InvalidXLogRecPtr;
-   missingContrecPtr = InvalidXLogRecPtr;
-
     /* REDO */
     if (InRecovery)
     {
-       int         rmid;
-
         /* Initialize state for RecoveryInProgress() */
         SpinLockAcquire(&XLogCtl->info_lck);
         if (InArchiveRecovery)
@@ -7376,13 +5183,13 @@ StartupXLOG(void)
          */
         if (InArchiveRecovery)
         {
-           minRecoveryPoint = ControlFile->minRecoveryPoint;
-           minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+           LocalMinRecoveryPoint = ControlFile->minRecoveryPoint;
+           LocalMinRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
         }
         else
         {
-           minRecoveryPoint = InvalidXLogRecPtr;
-           minRecoveryPointTLI = 0;
+           LocalMinRecoveryPoint = InvalidXLogRecPtr;
+           LocalMinRecoveryPointTLI = 0;
         }
  
         /*
@@ -7473,460 +5280,31 @@ StartupXLOG(void)
             }
         }
  
-       /*---- BEGIN PerformWalRecovery ----*/
-
-       /*
-        * Initialize shared variables for tracking progress of WAL replay, as
-        * if we had just replayed the record before the REDO location (or the
-        * checkpoint record itself, if it's a shutdown checkpoint).
-        */
-       SpinLockAcquire(&XLogCtl->info_lck);
-       if (checkPoint.redo < checkPointLoc)
-           XLogCtl->replayEndRecPtr = checkPoint.redo;
-       else
-           XLogCtl->replayEndRecPtr = xlogreader->EndRecPtr;
-       XLogCtl->replayEndTLI = replayTLI;
-       XLogCtl->lastReplayedEndRecPtr = XLogCtl->replayEndRecPtr;
-       XLogCtl->lastReplayedTLI = XLogCtl->replayEndTLI;
-       XLogCtl->recoveryLastXTime = 0;
-       XLogCtl->currentChunkStartTime = 0;
-       XLogCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
-       SpinLockRelease(&XLogCtl->info_lck);
-
-       /* Also ensure XLogReceiptTime has a sane value */
-       XLogReceiptTime = GetCurrentTimestamp();
-
-       /*
-        * Let postmaster know we've started redo now, so that it can launch
-        * the archiver if necessary.
-        */
-       if (IsUnderPostmaster)
-           SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
-
-       /*
-        * Allow read-only connections immediately if we're consistent
-        * already.
-        */
-       CheckRecoveryConsistency();
-
-       /*
-        * Find the first record that logically follows the checkpoint --- it
-        * might physically precede it, though.
-        */
-       if (checkPoint.redo < checkPointLoc)
-       {
-           /* back up to find the record */
-           XLogBeginRead(xlogreader, checkPoint.redo);
-           record = ReadRecord(xlogreader, PANIC, false, replayTLI);
-       }
-       else
-       {
-           /* just have to read next record after CheckPoint */
-           Assert(RecPtr == checkPointLoc);
-           record = ReadRecord(xlogreader, LOG, false, replayTLI);
-       }
-
-       if (record != NULL)
-       {
-           ErrorContextCallback errcallback;
-           TimestampTz xtime;
-           PGRUsage    ru0;
-
-           pg_rusage_init(&ru0);
-
-           InRedo = true;
-
-           /* Initialize resource managers */
-           for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
-           {
-               if (RmgrTable[rmid].rm_startup != NULL)
-                   RmgrTable[rmid].rm_startup();
-           }
-
-           ereport(LOG,
-                   (errmsg("redo starts at %X/%X",
-                           LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
-
-           /* Prepare to report progress of the redo phase. */
-           if (!StandbyMode)
-               begin_startup_progress_phase();
-
-           /*
-            * main redo apply loop
-            */
-           do
-           {
-               bool        switchedTLI = false;
-
-               if (!StandbyMode)
-                   ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
-                                            LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
-
-#ifdef WAL_DEBUG
-               if (XLOG_DEBUG ||
-                   (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
-                   (rmid != RM_XACT_ID && trace_recovery_messages <= DEBUG3))
-               {
-                   StringInfoData buf;
-
-                   initStringInfo(&buf);
-                   appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
-                                    LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
-                                    LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
-                   xlog_outrec(&buf, xlogreader);
-                   appendStringInfoString(&buf, " - ");
-                   xlog_outdesc(&buf, xlogreader);
-                   elog(LOG, "%s", buf.data);
-                   pfree(buf.data);
-               }
-#endif
-
-               /* Handle interrupt signals of startup process */
-               HandleStartupProcInterrupts();
-
-               /*
-                * Pause WAL replay, if requested by a hot-standby session via
-                * SetRecoveryPause().
-                *
-                * Note that we intentionally don't take the info_lck spinlock
-                * here.  We might therefore read a slightly stale value of
-                * the recoveryPause flag, but it can't be very stale (no
-                * worse than the last spinlock we did acquire).  Since a
-                * pause request is a pretty asynchronous thing anyway,
-                * possibly responding to it one WAL record later than we
-                * otherwise would is a minor issue, so it doesn't seem worth
-                * adding another spinlock cycle to prevent that.
-                */
-               if (((volatile XLogCtlData *) XLogCtl)->recoveryPauseState !=
-                   RECOVERY_NOT_PAUSED)
-                   recoveryPausesHere(false);
-
-               /*
-                * Have we reached our recovery target?
-                */
-               if (recoveryStopsBefore(xlogreader))
-               {
-                   reachedRecoveryTarget = true;
-                   break;
-               }
-
-               /*
-                * If we've been asked to lag the primary, wait on latch until
-                * enough time has passed.
-                */
-               if (recoveryApplyDelay(xlogreader))
-               {
-                   /*
-                    * We test for paused recovery again here. If user sets
-                    * delayed apply, it may be because they expect to pause
-                    * recovery in case of problems, so we must test again
-                    * here otherwise pausing during the delay-wait wouldn't
-                    * work.
-                    */
-                   if (((volatile XLogCtlData *) XLogCtl)->recoveryPauseState !=
-                       RECOVERY_NOT_PAUSED)
-                       recoveryPausesHere(false);
-               }
-
-               /* Setup error traceback support for ereport() */
-               errcallback.callback = rm_redo_error_callback;
-               errcallback.arg = (void *) xlogreader;
-               errcallback.previous = error_context_stack;
-               error_context_stack = &errcallback;
-
-               /*
-                * ShmemVariableCache->nextXid must be beyond record's xid.
-                */
-               AdvanceNextFullTransactionIdPastXid(record->xl_xid);
-
-               /*
-                * Before replaying this record, check if this record causes
-                * the current timeline to change. The record is already
-                * considered to be part of the new timeline, so we update
-                * replayTLI before replaying it. That's important so that
-                * replayEndTLI, which is recorded as the minimum recovery
-                * point's TLI if recovery stops after this record, is set
-                * correctly.
-                */
-               if (record->xl_rmid == RM_XLOG_ID)
-               {
-                   TimeLineID  newReplayTLI = replayTLI;
-                   TimeLineID  prevReplayTLI = replayTLI;
-                   uint8       info = record->xl_info & ~XLR_INFO_MASK;
-
-                   if (info == XLOG_CHECKPOINT_SHUTDOWN)
-                   {
-                       CheckPoint  checkPoint;
-
-                       memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
-                       newReplayTLI = checkPoint.ThisTimeLineID;
-                       prevReplayTLI = checkPoint.PrevTimeLineID;
-                   }
-                   else if (info == XLOG_END_OF_RECOVERY)
-                   {
-                       xl_end_of_recovery xlrec;
-
-                       memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
-                       newReplayTLI = xlrec.ThisTimeLineID;
-                       prevReplayTLI = xlrec.PrevTimeLineID;
-                   }
-
-                   if (newReplayTLI != replayTLI)
-                   {
-                       /* Check that it's OK to switch to this TLI */
-                       checkTimeLineSwitch(xlogreader->EndRecPtr,
-                                           newReplayTLI,
-                                           prevReplayTLI, replayTLI);
-
-                       /* Following WAL records should be run with new TLI */
-                       replayTLI = newReplayTLI;
-                       switchedTLI = true;
-                   }
-               }
-
-               /*
-                * Update shared replayEndRecPtr before replaying this record,
-                * so that XLogFlush will update minRecoveryPoint correctly.
-                */
-               SpinLockAcquire(&XLogCtl->info_lck);
-               XLogCtl->replayEndRecPtr = xlogreader->EndRecPtr;
-               XLogCtl->replayEndTLI = replayTLI;
-               SpinLockRelease(&XLogCtl->info_lck);
-
-               /*
-                * If we are attempting to enter Hot Standby mode, process
-                * XIDs we see
-                */
-               if (standbyState >= STANDBY_INITIALIZED &&
-                   TransactionIdIsValid(record->xl_xid))
-                   RecordKnownAssignedTransactionIds(record->xl_xid);
-
-               /* Now apply the WAL record itself */
-               RmgrTable[record->xl_rmid].rm_redo(xlogreader);
-
-               /*
-                * After redo, check whether the backup pages associated with
-                * the WAL record are consistent with the existing pages. This
-                * check is done only if consistency check is enabled for this
-                * record.
-                */
-               if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
-                   checkXLogConsistency(xlogreader);
-
-               /* Pop the error context stack */
-               error_context_stack = errcallback.previous;
-
-               /*
-                * Update lastReplayedEndRecPtr after this record has been
-                * successfully replayed.
-                */
-               SpinLockAcquire(&XLogCtl->info_lck);
-               XLogCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
-               XLogCtl->lastReplayedTLI = replayTLI;
-               SpinLockRelease(&XLogCtl->info_lck);
-
-               /*
-                * If rm_redo called XLogRequestWalReceiverReply, then we wake
-                * up the receiver so that it notices the updated
-                * lastReplayedEndRecPtr and sends a reply to the primary.
-                */
-               if (doRequestWalReceiverReply)
-               {
-                   doRequestWalReceiverReply = false;
-                   WalRcvForceReply();
-               }
-
-               /* Remember this record as the last-applied one */
-               LastRec = xlogreader->ReadRecPtr;
-
-               /* Allow read-only connections if we're consistent now */
-               CheckRecoveryConsistency();
-
-               /* Is this a timeline switch? */
-               if (switchedTLI)
-               {
-                   /*
-                    * Before we continue on the new timeline, clean up any
-                    * (possibly bogus) future WAL segments on the old
-                    * timeline.
-                    */
-                   RemoveNonParentXlogFiles(xlogreader->EndRecPtr, replayTLI);
-
-                   /*
-                    * Wake up any walsenders to notice that we are on a new
-                    * timeline.
-                    */
-                   if (AllowCascadeReplication())
-                       WalSndWakeup();
-               }
-
-               /* Exit loop if we reached inclusive recovery target */
-               if (recoveryStopsAfter(xlogreader))
-               {
-                   reachedRecoveryTarget = true;
-                   break;
-               }
-
-               /* Else, try to fetch the next WAL record */
-               record = ReadRecord(xlogreader, LOG, false, replayTLI);
-           } while (record != NULL);
-
-           /*
-            * end of main redo apply loop
-            */
-
-           if (reachedRecoveryTarget)
-           {
-               if (!reachedConsistency)
-                   ereport(FATAL,
-                           (errmsg("requested recovery stop point is before consistent recovery point")));
-
-               /*
-                * This is the last point where we can restart recovery with a
-                * new recovery target, if we shutdown and begin again. After
-                * this, Resource Managers may choose to do permanent
-                * corrective actions at end of recovery.
-                */
-               switch (recoveryTargetAction)
-               {
-                   case RECOVERY_TARGET_ACTION_SHUTDOWN:
-
-                       /*
-                        * exit with special return code to request shutdown
-                        * of postmaster.  Log messages issued from
-                        * postmaster.
-                        */
-                       proc_exit(3);
-
-                   case RECOVERY_TARGET_ACTION_PAUSE:
-                       SetRecoveryPause(true);
-                       recoveryPausesHere(true);
-
-                       /* drop into promote */
-
-                   case RECOVERY_TARGET_ACTION_PROMOTE:
-                       break;
-               }
-           }
-
-           /* Allow resource managers to do any required cleanup. */
-           for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
-           {
-               if (RmgrTable[rmid].rm_cleanup != NULL)
-                   RmgrTable[rmid].rm_cleanup();
-           }
-
-           ereport(LOG,
-                   (errmsg("redo done at %X/%X system usage: %s",
-                           LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
-                           pg_rusage_show(&ru0))));
-           xtime = GetLatestXTime();
-           if (xtime)
-               ereport(LOG,
-                       (errmsg("last completed transaction was at log time %s",
-                               timestamptz_to_str(xtime))));
-
-           InRedo = false;
-       }
-       else
-       {
-           /* there are no WAL records following the checkpoint */
-           ereport(LOG,
-                   (errmsg("redo is not required")));
-
-       }
-
         /*
-        * This check is intentionally after the above log messages that
-        * indicate how far recovery went.
+        * We're all set for replaying the WAL now. Do it.
          */
-       if (ArchiveRecoveryRequested &&
-           recoveryTarget != RECOVERY_TARGET_UNSET &&
-           !reachedRecoveryTarget)
-           ereport(FATAL,
-                   (errmsg("recovery ended before configured recovery target was reached")));
-
-       /*---- END PerformWalRecovery ----*/
+       PerformWalRecovery();
         performedWalRecovery = true;
     }
  
-   /*---- BEGIN FinishWalRecovery ----*/
-
-   /*
-    * Kill WAL receiver, if it's still running, before we continue to write
-    * the startup checkpoint and aborted-contrecord records. It will trump
-    * over these records and subsequent ones if it's still alive when we
-    * start writing WAL.
-    */
-   XLogShutdownWalRcv();
-
-   /*
-    * We are now done reading the xlog from stream. Turn off streaming
-    * recovery to force fetching the files (which would be required at end of
-    * recovery, e.g., timeline history file) from archive or pg_wal.
-    *
-    * Note that standby mode must be turned off after killing WAL receiver,
-    * i.e., calling XLogShutdownWalRcv().
-    */
-   Assert(!WalRcvStreaming());
-   StandbyMode = false;
-
-   /*
-    * Determine where to start writing WAL next.
-    *
-    * When recovery ended in an incomplete record, write a WAL record about
-    * that and continue after it.  In all other cases, re-fetch the last
-    * valid or last applied record, so we can identify the exact endpoint of
-    * what we consider the valid portion of WAL.
-    */
-   XLogBeginRead(xlogreader, LastRec);
-   record = ReadRecord(xlogreader, PANIC, false, replayTLI);
-   EndOfLog = xlogreader->EndRecPtr;
-
     /*
-    * EndOfLogTLI is the TLI in the filename of the XLOG segment containing
-    * the end-of-log. It could be different from the timeline that EndOfLog
-    * nominally belongs to, if there was a timeline switch in that segment,
-    * and we were reading the old WAL from a segment belonging to a higher
-    * timeline.
+    * Finish WAL recovery.
      */
-   EndOfLogTLI = xlogreader->seg.ws_tli;
-
-   if (ArchiveRecoveryRequested)
-   {
-       /*
-        * We are no longer in archive recovery state.
-        *
-        * We are now done reading the old WAL.  Turn off archive fetching if
-        * it was active.
-        */
-       Assert(InArchiveRecovery);
-       InArchiveRecovery = false;
-
-       /*
-        * If the ending log segment is still open, close it (to avoid
-        * problems on Windows with trying to rename or delete an open file).
-        */
-       if (readFile >= 0)
-       {
-           close(readFile);
-           readFile = -1;
-       }
-   }
-
-   recoveryStopReason = getRecoveryStopReason();
-
-   /*---- END FinishWalRecovery ----*/
+   endOfRecoveryInfo = FinishWalRecovery();
+   EndOfLog = endOfRecoveryInfo->endOfLog;
+   EndOfLogTLI = endOfRecoveryInfo->endOfLogTLI;
+   abortedRecPtr = endOfRecoveryInfo->abortedRecPtr;
+   missingContrecPtr = endOfRecoveryInfo->missingContrecPtr;
  
     /*
      * Complain if we did not roll forward far enough to render the backup
      * dump consistent.  Note: it is indeed okay to look at the local variable
-    * minRecoveryPoint here, even though ControlFile->minRecoveryPoint might
-    * be further ahead --- ControlFile->minRecoveryPoint cannot have been
-    * advanced beyond the WAL we processed.
+    * LocalMinRecoveryPoint here, even though ControlFile->minRecoveryPoint
+    * might be further ahead --- ControlFile->minRecoveryPoint cannot have
+    * been advanced beyond the WAL we processed.
      */
     if (InRecovery &&
-       (EndOfLog < minRecoveryPoint ||
+       (EndOfLog < LocalMinRecoveryPoint ||
          !XLogRecPtrIsInvalid(ControlFile->backupStartPoint)))
     {
         /*
@@ -7993,7 +5371,7 @@ StartupXLOG(void)
      *
      * In a normal crash recovery, we can just extend the timeline we were in.
      */
-   newTLI = replayTLI;
+   newTLI = endOfRecoveryInfo->lastRecTLI;
     if (ArchiveRecoveryRequested)
     {
         newTLI = findNewestTimeLine(recoveryTargetTLI) + 1;
@@ -8002,8 +5380,8 @@ StartupXLOG(void)
  
         /*
          * Make a writable copy of the last WAL segment.  (Note that we also
-        * have a copy of the last block of the old WAL in readBuf; we will
-        * use that below.)
+        * have a copy of the last block of the old WAL in
+        * endOfRecovery->lastPage; we will use that below.)
          */
         XLogInitNewTimeline(EndOfLogTLI, EndOfLog, newTLI);
  
@@ -8011,10 +5389,10 @@ StartupXLOG(void)
          * Remove the signal files out of the way, so that we don't
          * accidentally re-enter archive recovery mode in a subsequent crash.
          */
-       if (standby_signal_file_found)
+       if (endOfRecoveryInfo->standby_signal_file_found)
             durable_unlink(STANDBY_SIGNAL_FILE, FATAL);
  
-       if (recovery_signal_file_found)
+       if (endOfRecoveryInfo->recovery_signal_file_found)
             durable_unlink(RECOVERY_SIGNAL_FILE, FATAL);
  
         /*
@@ -8028,7 +5406,7 @@ StartupXLOG(void)
          * between here and writing the end-of-recovery record.
          */
         writeTimeLineHistory(newTLI, recoveryTargetTLI,
-                            EndOfLog, recoveryStopReason);
+                            EndOfLog, endOfRecoveryInfo->recoveryStopReason);
  
         ereport(LOG,
                 (errmsg("archive recovery complete")));
@@ -8036,7 +5414,7 @@ StartupXLOG(void)
  
     /* Save the selected TimeLineID in shared memory, too */
     XLogCtl->InsertTimeLineID = newTLI;
-   XLogCtl->PrevTimeLineID = replayTLI;
+   XLogCtl->PrevTimeLineID = endOfRecoveryInfo->lastRecTLI;
  
     /*
      * Actually, if WAL ended in an incomplete record, skip the parts that
@@ -8056,11 +5434,11 @@ StartupXLOG(void)
      * previous incarnation.
      */
     Insert = &XLogCtl->Insert;
-   Insert->PrevBytePos = XLogRecPtrToBytePos(LastRec);
+   Insert->PrevBytePos = XLogRecPtrToBytePos(endOfRecoveryInfo->lastRec);
     Insert->CurrBytePos = XLogRecPtrToBytePos(EndOfLog);
  
     /*
-    * Tricky point here: readBuf contains the *last* block that the LastRec
+    * Tricky point here: lastPage contains the *last* block that the LastRec
      * record spans, not the one it starts in.  The last block is indeed the
      * one we want to use.
      */
@@ -8069,21 +5447,18 @@ StartupXLOG(void)
         char       *page;
         int         len;
         int         firstIdx;
-       XLogRecPtr  pageBeginPtr;
-
-       pageBeginPtr = EndOfLog - (EndOfLog % XLOG_BLCKSZ);
-       Assert(readOff == XLogSegmentOffset(pageBeginPtr, wal_segment_size));
  
         firstIdx = XLogRecPtrToBufIdx(EndOfLog);
+       len = EndOfLog - endOfRecoveryInfo->lastPageBeginPtr;
+       Assert(len < XLOG_BLCKSZ);
  
         /* Copy the valid part of the last block, and zero the rest */
         page = &XLogCtl->pages[firstIdx * XLOG_BLCKSZ];
-       len = EndOfLog % XLOG_BLCKSZ;
-       memcpy(page, xlogreader->readBuf, len);
+       memcpy(page, endOfRecoveryInfo->lastPage, XLOG_BLCKSZ);
         memset(page + len, 0, XLOG_BLCKSZ - len);
  
-       XLogCtl->xlblocks[firstIdx] = pageBeginPtr + XLOG_BLCKSZ;
-       XLogCtl->InitializedUpTo = pageBeginPtr + XLOG_BLCKSZ;
+       XLogCtl->xlblocks[firstIdx] = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
+       XLogCtl->InitializedUpTo = endOfRecoveryInfo->lastPageBeginPtr + XLOG_BLCKSZ;
     }
     else
     {
@@ -8138,40 +5513,8 @@ StartupXLOG(void)
     /* Reload shared-memory state for prepared transactions */
     RecoverPreparedTransactions();
  
-   /*---- BEGIN ShutdownWalRecovery ----*/
-
     /* Shut down xlogreader */
-   if (readFile >= 0)
-   {
-       close(readFile);
-       readFile = -1;
-   }
-   XLogReaderFree(xlogreader);
-
-   if (ArchiveRecoveryRequested)
-   {
-       char        recoveryPath[MAXPGPATH];
-
-       /*
-        * Since there might be a partial WAL segment named RECOVERYXLOG, get
-        * rid of it.
-        */
-       snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
-       unlink(recoveryPath);   /* ignore any error */
-
-       /* Get rid of any remaining recovered timeline-history file, too */
-       snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYHISTORY");
-       unlink(recoveryPath);   /* ignore any error */
-   }
-
-   /*
-    * We don't need the latch anymore. It's not strictly necessary to disown
-    * it, but let's do it for the sake of tidiness.
-    */
-   if (ArchiveRecoveryRequested)
-       DisownLatch(&XLogCtl->recoveryWakeupLatch);
-
-   /*---- END ShutdownWalRecovery ----*/
+   ShutdownWalRecovery();
  
     /* Enable WAL writes for this backend only. */
     LocalSetXLogInsertAllowed();
@@ -8181,8 +5524,6 @@ StartupXLOG(void)
     {
         Assert(!XLogRecPtrIsInvalid(missingContrecPtr));
         CreateOverwriteContrecordRecord(abortedRecPtr, missingContrecPtr, newTLI);
-       abortedRecPtr = InvalidXLogRecPtr;
-       missingContrecPtr = InvalidXLogRecPtr;
     }
  
     /*
@@ -8269,99 +5610,72 @@ StartupXLOG(void)
  }
  
  /*
- * Checks if recovery has reached a consistent state. When consistency is
- * reached and we have a valid starting standby snapshot, tell postmaster
- * that it can start accepting read-only connections.
+ * Callback from PerformWalRecovery(), called when we switch from crash
+ * recovery to archive recovery mode.  Updates the control file accordingly.
   */
-static void
-CheckRecoveryConsistency(void)
+void
+SwitchIntoArchiveRecovery(XLogRecPtr EndRecPtr, TimeLineID replayTLI)
  {
-   XLogRecPtr  lastReplayedEndRecPtr;
+   /* initialize minRecoveryPoint to this record */
+   LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+   ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
+   if (ControlFile->minRecoveryPoint < EndRecPtr)
+   {
+       ControlFile->minRecoveryPoint = EndRecPtr;
+       ControlFile->minRecoveryPointTLI = replayTLI;
+   }
+   /* update local copy */
+   LocalMinRecoveryPoint = ControlFile->minRecoveryPoint;
+   LocalMinRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
  
     /*
-    * During crash recovery, we don't reach a consistent state until we've
-    * replayed all the WAL.
+    * The startup process can update its local copy of minRecoveryPoint from
+    * this point.
      */
-   if (XLogRecPtrIsInvalid(minRecoveryPoint))
-       return;
-
-   Assert(InArchiveRecovery);
+   updateMinRecoveryPoint = true;
  
-   /*
-    * assume that we are called in the startup process, and hence don't need
-    * a lock to read lastReplayedEndRecPtr
-    */
-   lastReplayedEndRecPtr = XLogCtl->lastReplayedEndRecPtr;
+   UpdateControlFile();
  
     /*
-    * Have we reached the point where our base backup was completed?
+    * We update SharedRecoveryState while holding the lock on ControlFileLock
+    * so both states are consistent in shared memory.
      */
-   if (!XLogRecPtrIsInvalid(ControlFile->backupEndPoint) &&
-       ControlFile->backupEndPoint <= lastReplayedEndRecPtr)
-   {
-       /*
-        * We have reached the end of base backup, as indicated by pg_control.
-        * The data on disk is now consistent. Reset backupStartPoint and
-        * backupEndPoint, and update minRecoveryPoint to make sure we don't
-        * allow starting up at an earlier point even if recovery is stopped
-        * and restarted soon after this.
-        */
-       elog(DEBUG1, "end of backup reached");
-
-       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-
-       if (ControlFile->minRecoveryPoint < lastReplayedEndRecPtr)
-           ControlFile->minRecoveryPoint = lastReplayedEndRecPtr;
-
-       ControlFile->backupStartPoint = InvalidXLogRecPtr;
-       ControlFile->backupEndPoint = InvalidXLogRecPtr;
-       ControlFile->backupEndRequired = false;
-       UpdateControlFile();
+   SpinLockAcquire(&XLogCtl->info_lck);
+   XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE;
+   SpinLockRelease(&XLogCtl->info_lck);
  
-       LWLockRelease(ControlFileLock);
-   }
+   LWLockRelease(ControlFileLock);
+}
  
+/*
+ * Callback from PerformWalRecovery(), called when we reach the end of backup.
+ * Updates the control file accordingly.
+ */
+void
+ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli)
+{
     /*
-    * Have we passed our safe starting point? Note that minRecoveryPoint is
-    * known to be incorrectly set if ControlFile->backupEndRequired, until
-    * the XLOG_BACKUP_END arrives to advise us of the correct
-    * minRecoveryPoint. All we know prior to that is that we're not
-    * consistent yet.
+    * We have reached the end of base backup, as indicated by pg_control. The
+    * data on disk is now consistent (unless minRecovery point is further
+    * ahead, which can happen if we crashed during previous recovery).  Reset
+    * backupStartPoint and backupEndPoint, and update minRecoveryPoint to
+    * make sure we don't allow starting up at an earlier point even if
+    * recovery is stopped and restarted soon after this.
      */
-   if (!reachedConsistency && !ControlFile->backupEndRequired &&
-       minRecoveryPoint <= lastReplayedEndRecPtr &&
-       XLogRecPtrIsInvalid(ControlFile->backupStartPoint))
-   {
-       /*
-        * Check to see if the XLOG sequence contained any unresolved
-        * references to uninitialized pages.
-        */
-       XLogCheckInvalidPages();
-
-       reachedConsistency = true;
-       ereport(LOG,
-               (errmsg("consistent recovery state reached at %X/%X",
-                       LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
-   }
+   LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
  
-   /*
-    * Have we got a valid starting snapshot that will allow queries to be
-    * run? If so, we can tell postmaster that the database is consistent now,
-    * enabling connections.
-    */
-   if (standbyState == STANDBY_SNAPSHOT_READY &&
-       !LocalHotStandbyActive &&
-       reachedConsistency &&
-       IsUnderPostmaster)
+   if (ControlFile->minRecoveryPoint < EndRecPtr)
     {
-       SpinLockAcquire(&XLogCtl->info_lck);
-       XLogCtl->SharedHotStandbyActive = true;
-       SpinLockRelease(&XLogCtl->info_lck);
+       ControlFile->minRecoveryPoint = EndRecPtr;
+       ControlFile->minRecoveryPointTLI = tli;
+   }
  
-       LocalHotStandbyActive = true;
+   ControlFile->backupStartPoint = InvalidXLogRecPtr;
+   ControlFile->backupEndPoint = InvalidXLogRecPtr;
+   ControlFile->backupEndRequired = false;
+   UpdateControlFile();
  
-       SendPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY);
-   }
+   LWLockRelease(ControlFileLock);
  }
  
  /*
@@ -8393,7 +5707,7 @@ PerformRecoveryXLogAction(void)
      * fully out of recovery mode and already accepting queries.
      */
     if (ArchiveRecoveryRequested && IsUnderPostmaster &&
-       LocalPromoteIsTriggered)
+       PromoteIsTriggered())
     {
         promoted = true;
  
@@ -8472,47 +5786,6 @@ GetRecoveryState(void)
     return retval;
  }
  
-/*
- * Is HotStandby active yet? This is only important in special backends
- * since normal backends won't ever be able to connect until this returns
- * true. Postmaster knows this by way of signal, not via shared memory.
- *
- * Unlike testing standbyState, this works in any process that's connected to
- * shared memory.  (And note that standbyState alone doesn't tell the truth
- * anyway.)
- */
-bool
-HotStandbyActive(void)
-{
-   /*
-    * We check shared state each time only until Hot Standby is active. We
-    * can't de-activate Hot Standby, so there's no need to keep checking
-    * after the shared variable has once been seen true.
-    */
-   if (LocalHotStandbyActive)
-       return true;
-   else
-   {
-       /* spinlock is essential on machines with weak memory ordering! */
-       SpinLockAcquire(&XLogCtl->info_lck);
-       LocalHotStandbyActive = XLogCtl->SharedHotStandbyActive;
-       SpinLockRelease(&XLogCtl->info_lck);
-
-       return LocalHotStandbyActive;
-   }
-}
-
-/*
- * Like HotStandbyActive(), but to be used only in WAL replay code,
- * where we don't need to ask any other process what the state is.
- */
-bool
-HotStandbyActiveInReplay(void)
-{
-   Assert(AmStartupProcess() || !IsPostmasterEnvironment);
-   return LocalHotStandbyActive;
-}
-
  /*
   * Is this process allowed to insert new WAL records?
   *
@@ -8563,109 +5836,6 @@ LocalSetXLogInsertAllowed(void)
     return oldXLogAllowed;
  }
  
-/*
- * Subroutine to try to fetch and validate a prior checkpoint record.
- *
- * whichChkpt identifies the checkpoint (merely for reporting purposes).
- * 1 for "primary", 0 for "other" (backup_label)
- */
-static XLogRecord *
-ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
-                    int whichChkpt, bool report, TimeLineID replayTLI)
-{
-   XLogRecord *record;
-   uint8       info;
-
-   if (!XRecOffIsValid(RecPtr))
-   {
-       if (!report)
-           return NULL;
-
-       switch (whichChkpt)
-       {
-           case 1:
-               ereport(LOG,
-                       (errmsg("invalid primary checkpoint link in control file")));
-               break;
-           default:
-               ereport(LOG,
-                       (errmsg("invalid checkpoint link in backup_label file")));
-               break;
-       }
-       return NULL;
-   }
-
-   XLogBeginRead(xlogreader, RecPtr);
-   record = ReadRecord(xlogreader, LOG, true, replayTLI);
-
-   if (record == NULL)
-   {
-       if (!report)
-           return NULL;
-
-       switch (whichChkpt)
-       {
-           case 1:
-               ereport(LOG,
-                       (errmsg("invalid primary checkpoint record")));
-               break;
-           default:
-               ereport(LOG,
-                       (errmsg("invalid checkpoint record")));
-               break;
-       }
-       return NULL;
-   }
-   if (record->xl_rmid != RM_XLOG_ID)
-   {
-       switch (whichChkpt)
-       {
-           case 1:
-               ereport(LOG,
-                       (errmsg("invalid resource manager ID in primary checkpoint record")));
-               break;
-           default:
-               ereport(LOG,
-                       (errmsg("invalid resource manager ID in checkpoint record")));
-               break;
-       }
-       return NULL;
-   }
-   info = record->xl_info & ~XLR_INFO_MASK;
-   if (info != XLOG_CHECKPOINT_SHUTDOWN &&
-       info != XLOG_CHECKPOINT_ONLINE)
-   {
-       switch (whichChkpt)
-       {
-           case 1:
-               ereport(LOG,
-                       (errmsg("invalid xl_info in primary checkpoint record")));
-               break;
-           default:
-               ereport(LOG,
-                       (errmsg("invalid xl_info in checkpoint record")));
-               break;
-       }
-       return NULL;
-   }
-   if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
-   {
-       switch (whichChkpt)
-       {
-           case 1:
-               ereport(LOG,
-                       (errmsg("invalid length of primary checkpoint record")));
-               break;
-           default:
-               ereport(LOG,
-                       (errmsg("invalid length of checkpoint record")));
-               break;
-       }
-       return NULL;
-   }
-   return record;
-}
-
  /*
   * Return the current Redo pointer from shared memory.
   *
@@ -9849,8 +7019,8 @@ CreateRestartPoint(int flags)
             ControlFile->minRecoveryPointTLI = lastCheckPoint.ThisTimeLineID;
  
             /* update local copy */
-           minRecoveryPoint = ControlFile->minRecoveryPoint;
-           minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+           LocalMinRecoveryPoint = ControlFile->minRecoveryPoint;
+           LocalMinRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
         }
         if (flags & CHECKPOINT_IS_SHUTDOWN)
             ControlFile->state = DB_SHUTDOWNED_IN_RECOVERY;
@@ -10313,67 +7483,20 @@ UpdateFullPageWrites(void)
     END_CRIT_SECTION();
  }
  
-/*
- * Check that it's OK to switch to new timeline during recovery.
- *
- * 'lsn' is the address of the shutdown checkpoint record we're about to
- * replay. (Currently, timeline can only change at a shutdown checkpoint).
- */
-static void
-checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
-                   TimeLineID replayTLI)
-{
-   /* Check that the record agrees on what the current (old) timeline is */
-   if (prevTLI != replayTLI)
-       ereport(PANIC,
-               (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
-                       prevTLI, replayTLI)));
-
-   /*
-    * The new timeline better be in the list of timelines we expect to see,
-    * according to the timeline history. It should also not decrease.
-    */
-   if (newTLI < replayTLI || !tliInHistory(newTLI, expectedTLEs))
-       ereport(PANIC,
-               (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
-                       newTLI, replayTLI)));
-
-   /*
-    * If we have not yet reached min recovery point, and we're about to
-    * switch to a timeline greater than the timeline of the min recovery
-    * point: trouble. After switching to the new timeline, we could not
-    * possibly visit the min recovery point on the correct timeline anymore.
-    * This can happen if there is a newer timeline in the archive that
-    * branched before the timeline the min recovery point is on, and you
-    * attempt to do PITR to the new timeline.
-    */
-   if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
-       lsn < minRecoveryPoint &&
-       newTLI > minRecoveryPointTLI)
-       ereport(PANIC,
-               (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
-                       newTLI,
-                       LSN_FORMAT_ARGS(minRecoveryPoint),
-                       minRecoveryPointTLI)));
-
-   /* Looks good */
-}
-
  /*
   * XLOG resource manager's routines
   *
   * Definitions of info values are in include/catalog/pg_control.h, though
   * not all record types are related to control file updates.
+ *
+ * NOTE: Some XLOG record types that are directly related to WAL recovery
+ * are handled in xlogrecovery_redo().
   */
  void
  xlog_redo(XLogReaderState *record)
  {
     uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
     XLogRecPtr  lsn = record->EndRecPtr;
-   TimeLineID  replayTLI;
-
-   /* No other process can change this, so we can read it without a lock. */
-   replayTLI = XLogCtl->replayEndTLI;
  
     /*
      * In XLOG rmgr, backup blocks are only used by XLOG_FPI and
@@ -10402,6 +7525,7 @@ xlog_redo(XLogReaderState *record)
     else if (info == XLOG_CHECKPOINT_SHUTDOWN)
     {
         CheckPoint  checkPoint;
+       TimeLineID  replayTLI;
  
         memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
         /* In a SHUTDOWN checkpoint, believe the counters exactly */
@@ -10487,6 +7611,7 @@ xlog_redo(XLogReaderState *record)
          * We should've already switched to the new TLI before replaying this
          * record.
          */
+       (void) GetCurrentReplayRecPtr(&replayTLI);
         if (checkPoint.ThisTimeLineID != replayTLI)
             ereport(PANIC,
                     (errmsg("unexpected timeline ID %u (should be %u) in shutdown checkpoint record",
@@ -10497,6 +7622,7 @@ xlog_redo(XLogReaderState *record)
     else if (info == XLOG_CHECKPOINT_ONLINE)
     {
         CheckPoint  checkPoint;
+       TimeLineID  replayTLI;
  
         memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
         /* In an ONLINE checkpoint, treat the XID counter as a minimum */
@@ -10543,6 +7669,7 @@ xlog_redo(XLogReaderState *record)
         SpinLockRelease(&XLogCtl->info_lck);
  
         /* TLI should not change in an on-line checkpoint */
+       (void) GetCurrentReplayRecPtr(&replayTLI);
         if (checkPoint.ThisTimeLineID != replayTLI)
             ereport(PANIC,
                     (errmsg("unexpected timeline ID %u (should be %u) in online checkpoint record",
@@ -10552,14 +7679,12 @@ xlog_redo(XLogReaderState *record)
     }
     else if (info == XLOG_OVERWRITE_CONTRECORD)
     {
-       xl_overwrite_contrecord xlrec;
-
-       memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
-       VerifyOverwriteContrecord(&xlrec, record);
+       /* nothing to do here, handled in xlogrecovery_redo() */
     }
     else if (info == XLOG_END_OF_RECOVERY)
     {
         xl_end_of_recovery xlrec;
+       TimeLineID  replayTLI;
  
         memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
  
@@ -10573,6 +7698,7 @@ xlog_redo(XLogReaderState *record)
          * We should've already switched to the new TLI before replaying this
          * record.
          */
+       (void) GetCurrentReplayRecPtr(&replayTLI);
         if (xlrec.ThisTimeLineID != replayTLI)
             ereport(PANIC,
                     (errmsg("unexpected timeline ID %u (should be %u) in end-of-recovery record",
@@ -10588,7 +7714,7 @@ xlog_redo(XLogReaderState *record)
     }
     else if (info == XLOG_RESTORE_POINT)
     {
-       /* nothing to do here */
+       /* nothing to do here, handled in xlogrecovery.c */
     }
     else if (info == XLOG_FPI || info == XLOG_FPI_FOR_HINT)
     {
@@ -10626,34 +7752,7 @@ xlog_redo(XLogReaderState *record)
     }
     else if (info == XLOG_BACKUP_END)
     {
-       XLogRecPtr  startpoint;
-
-       memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
-
-       if (ControlFile->backupStartPoint == startpoint)
-       {
-           /*
-            * We have reached the end of base backup, the point where
-            * pg_stop_backup() was done. The data on disk is now consistent.
-            * Reset backupStartPoint, and update minRecoveryPoint to make
-            * sure we don't allow starting up at an earlier point even if
-            * recovery is stopped and restarted soon after this.
-            */
-           elog(DEBUG1, "end of backup reached");
-
-           LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-
-           if (ControlFile->minRecoveryPoint < lsn)
-           {
-               ControlFile->minRecoveryPoint = lsn;
-               ControlFile->minRecoveryPointTLI = replayTLI;
-           }
-           ControlFile->backupStartPoint = InvalidXLogRecPtr;
-           ControlFile->backupEndRequired = false;
-           UpdateControlFile();
-
-           LWLockRelease(ControlFileLock);
-       }
+       /* nothing to do here, handled in xlogrecovery_redo() */
     }
     else if (info == XLOG_PARAMETER_CHANGE)
     {
@@ -10681,11 +7780,14 @@ xlog_redo(XLogReaderState *record)
          */
         if (InArchiveRecovery)
         {
-           minRecoveryPoint = ControlFile->minRecoveryPoint;
-           minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+           LocalMinRecoveryPoint = ControlFile->minRecoveryPoint;
+           LocalMinRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
         }
-       if (minRecoveryPoint != InvalidXLogRecPtr && minRecoveryPoint < lsn)
+       if (LocalMinRecoveryPoint != InvalidXLogRecPtr && LocalMinRecoveryPoint < lsn)
         {
+           TimeLineID  replayTLI;
+
+           (void) GetCurrentReplayRecPtr(&replayTLI);
             ControlFile->minRecoveryPoint = lsn;
             ControlFile->minRecoveryPointTLI = replayTLI;
         }
@@ -10724,102 +7826,6 @@ xlog_redo(XLogReaderState *record)
     }
  }
  
-/*
- * Verify the payload of a XLOG_OVERWRITE_CONTRECORD record.
- */
-static void
-VerifyOverwriteContrecord(xl_overwrite_contrecord *xlrec, XLogReaderState *state)
-{
-   if (xlrec->overwritten_lsn != state->overwrittenRecPtr)
-       elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X",
-            LSN_FORMAT_ARGS(xlrec->overwritten_lsn),
-            LSN_FORMAT_ARGS(state->overwrittenRecPtr));
-
-   ereport(LOG,
-           (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s",
-                   LSN_FORMAT_ARGS(xlrec->overwritten_lsn),
-                   timestamptz_to_str(xlrec->overwrite_time))));
-
-   /* Verifying the record should only happen once */
-   state->overwrittenRecPtr = InvalidXLogRecPtr;
-}
-
-#ifdef WAL_DEBUG
-
-static void
-xlog_outrec(StringInfo buf, XLogReaderState *record)
-{
-   appendStringInfo(buf, "prev %X/%X; xid %u",
-                    LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
-                    XLogRecGetXid(record));
-
-   appendStringInfo(buf, "; len %u",
-                    XLogRecGetDataLen(record));
-
-   xlog_block_info(buf, record);
-}
-#endif                         /* WAL_DEBUG */
-
-/*
- * Returns a string giving information about all the blocks in an
- * XLogRecord.
- */
-static void
-xlog_block_info(StringInfo buf, XLogReaderState *record)
-{
-   int         block_id;
-
-   /* decode block references */
-   for (block_id = 0; block_id <= record->max_block_id; block_id++)
-   {
-       RelFileNode rnode;
-       ForkNumber  forknum;
-       BlockNumber blk;
-
-       if (!XLogRecHasBlockRef(record, block_id))
-           continue;
-
-       XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
-       if (forknum != MAIN_FORKNUM)
-           appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u",
-                            block_id,
-                            rnode.spcNode, rnode.dbNode, rnode.relNode,
-                            forknum,
-                            blk);
-       else
-           appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u",
-                            block_id,
-                            rnode.spcNode, rnode.dbNode, rnode.relNode,
-                            blk);
-       if (XLogRecHasBlockImage(record, block_id))
-           appendStringInfoString(buf, " FPW");
-   }
-}
-
-/*
- * Returns a string describing an XLogRecord, consisting of its identity
- * optionally followed by a colon, a space, and a further description.
- */
-static void
-xlog_outdesc(StringInfo buf, XLogReaderState *record)
-{
-   RmgrId      rmid = XLogRecGetRmid(record);
-   uint8       info = XLogRecGetInfo(record);
-   const char *id;
-
-   appendStringInfoString(buf, RmgrTable[rmid].rm_name);
-   appendStringInfoChar(buf, '/');
-
-   id = RmgrTable[rmid].rm_identify(info);
-   if (id == NULL)
-       appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
-   else
-       appendStringInfo(buf, "%s: ", id);
-
-   RmgrTable[rmid].rm_desc(buf, record);
-}
-
-
  /*
   * Return the (possible) sync flag used for opening a file, depending on the
   * value of the GUC wal_sync_method.
@@ -12024,27 +9030,6 @@ register_persistent_abort_backup_handler(void)
     already_done = true;
  }
  
-/*
- * Get latest redo apply position.
- *
- * Exported to allow WALReceiver to read the pointer directly.
- */
-XLogRecPtr
-GetXLogReplayRecPtr(TimeLineID *replayTLI)
-{
-   XLogRecPtr  recptr;
-   TimeLineID  tli;
-
-   SpinLockAcquire(&XLogCtl->info_lck);
-   recptr = XLogCtl->lastReplayedEndRecPtr;
-   tli = XLogCtl->lastReplayedTLI;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   if (replayTLI)
-       *replayTLI = tli;
-   return recptr;
-}
-
  /*
   * Get latest WAL insert pointer
   */
@@ -12064,283 +9049,27 @@ GetXLogInsertRecPtr(void)
  /*
   * Get latest WAL write pointer
   */
-XLogRecPtr
-GetXLogWriteRecPtr(void)
-{
-   SpinLockAcquire(&XLogCtl->info_lck);
-   LogwrtResult = XLogCtl->LogwrtResult;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   return LogwrtResult.Write;
-}
-
-/*
- * Returns the redo pointer of the last checkpoint or restartpoint. This is
- * the oldest point in WAL that we still need, if we have to restart recovery.
- */
-void
-GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
-{
-   LWLockAcquire(ControlFileLock, LW_SHARED);
-   *oldrecptr = ControlFile->checkPointCopy.redo;
-   *oldtli = ControlFile->checkPointCopy.ThisTimeLineID;
-   LWLockRelease(ControlFileLock);
-}
-
-/*
- * read_backup_label: check to see if a backup_label file is present
- *
- * If we see a backup_label during recovery, we assume that we are recovering
- * from a backup dump file, and we therefore roll forward from the checkpoint
- * identified by the label file, NOT what pg_control says.  This avoids the
- * problem that pg_control might have been archived one or more checkpoints
- * later than the start of the dump, and so if we rely on it as the start
- * point, we will fail to restore a consistent database state.
- *
- * Returns true if a backup_label was found (and fills the checkpoint
- * location and TLI into *checkPointLoc and *backupLabelTLI, respectively);
- * returns false if not. If this backup_label came from a streamed backup,
- * *backupEndRequired is set to true. If this backup_label was created during
- * recovery, *backupFromStandby is set to true.
- *
- * Also sets the global variable RedoStartLSN with the LSN read from the
- * backup file.
- */
-static bool
-read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
-                 bool *backupEndRequired, bool *backupFromStandby)
-{
-   char        startxlogfilename[MAXFNAMELEN];
-   TimeLineID  tli_from_walseg,
-               tli_from_file;
-   FILE       *lfp;
-   char        ch;
-   char        backuptype[20];
-   char        backupfrom[20];
-   char        backuplabel[MAXPGPATH];
-   char        backuptime[128];
-   uint32      hi,
-               lo;
-
-   /* suppress possible uninitialized-variable warnings */
-   *checkPointLoc = InvalidXLogRecPtr;
-   *backupLabelTLI = 0;
-   *backupEndRequired = false;
-   *backupFromStandby = false;
-
-   /*
-    * See if label file is present
-    */
-   lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
-   if (!lfp)
-   {
-       if (errno != ENOENT)
-           ereport(FATAL,
-                   (errcode_for_file_access(),
-                    errmsg("could not read file \"%s\": %m",
-                           BACKUP_LABEL_FILE)));
-       return false;           /* it's not there, all is fine */
-   }
-
-   /*
-    * Read and parse the START WAL LOCATION and CHECKPOINT lines (this code
-    * is pretty crude, but we are not expecting any variability in the file
-    * format).
-    */
-   if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
-              &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
-       ereport(FATAL,
-               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
-   RedoStartLSN = ((uint64) hi) << 32 | lo;
-   if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
-              &hi, &lo, &ch) != 3 || ch != '\n')
-       ereport(FATAL,
-               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
-   *checkPointLoc = ((uint64) hi) << 32 | lo;
-
-   /*
-    * BACKUP METHOD and BACKUP FROM lines are new in 9.2. We can't restore
-    * from an older backup anyway, but since the information on it is not
-    * strictly required, don't error out if it's missing for some reason.
-    */
-   if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
-   {
-       if (strcmp(backuptype, "streamed") == 0)
-           *backupEndRequired = true;
-   }
-
-   if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
-   {
-       if (strcmp(backupfrom, "standby") == 0)
-           *backupFromStandby = true;
-   }
-
-   /*
-    * Parse START TIME and LABEL. Those are not mandatory fields for recovery
-    * but checking for their presence is useful for debugging and the next
-    * sanity checks. Cope also with the fact that the result buffers have a
-    * pre-allocated size, hence if the backup_label file has been generated
-    * with strings longer than the maximum assumed here an incorrect parsing
-    * happens. That's fine as only minor consistency checks are done
-    * afterwards.
-    */
-   if (fscanf(lfp, "START TIME: %127[^\n]\n", backuptime) == 1)
-       ereport(DEBUG1,
-               (errmsg_internal("backup time %s in file \"%s\"",
-                                backuptime, BACKUP_LABEL_FILE)));
-
-   if (fscanf(lfp, "LABEL: %1023[^\n]\n", backuplabel) == 1)
-       ereport(DEBUG1,
-               (errmsg_internal("backup label %s in file \"%s\"",
-                                backuplabel, BACKUP_LABEL_FILE)));
-
-   /*
-    * START TIMELINE is new as of 11. Its parsing is not mandatory, still use
-    * it as a sanity check if present.
-    */
-   if (fscanf(lfp, "START TIMELINE: %u\n", &tli_from_file) == 1)
-   {
-       if (tli_from_walseg != tli_from_file)
-           ereport(FATAL,
-                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                    errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE),
-                    errdetail("Timeline ID parsed is %u, but expected %u.",
-                              tli_from_file, tli_from_walseg)));
-
-       ereport(DEBUG1,
-               (errmsg_internal("backup timeline %u in file \"%s\"",
-                                tli_from_file, BACKUP_LABEL_FILE)));
-   }
-
-   if (ferror(lfp) || FreeFile(lfp))
-       ereport(FATAL,
-               (errcode_for_file_access(),
-                errmsg("could not read file \"%s\": %m",
-                       BACKUP_LABEL_FILE)));
-
-   *backupLabelTLI = tli_from_walseg;
-
-   return true;
-}
-
-/*
- * read_tablespace_map: check to see if a tablespace_map file is present
- *
- * If we see a tablespace_map file during recovery, we assume that we are
- * recovering from a backup dump file, and we therefore need to create symlinks
- * as per the information present in tablespace_map file.
- *
- * Returns true if a tablespace_map file was found (and fills *tablespaces
- * with a tablespaceinfo struct for each tablespace listed in the file);
- * returns false if not.
- */
-static bool
-read_tablespace_map(List **tablespaces)
-{
-   tablespaceinfo *ti;
-   FILE       *lfp;
-   char        str[MAXPGPATH];
-   int         ch,
-               i,
-               n;
-   bool        was_backslash;
-
-   /*
-    * See if tablespace_map file is present
-    */
-   lfp = AllocateFile(TABLESPACE_MAP, "r");
-   if (!lfp)
-   {
-       if (errno != ENOENT)
-           ereport(FATAL,
-                   (errcode_for_file_access(),
-                    errmsg("could not read file \"%s\": %m",
-                           TABLESPACE_MAP)));
-       return false;           /* it's not there, all is fine */
-   }
-
-   /*
-    * Read and parse the link name and path lines from tablespace_map file
-    * (this code is pretty crude, but we are not expecting any variability in
-    * the file format).  De-escape any backslashes that were inserted.
-    */
-   i = 0;
-   was_backslash = false;
-   while ((ch = fgetc(lfp)) != EOF)
-   {
-       if (!was_backslash && (ch == '\n' || ch == '\r'))
-       {
-           if (i == 0)
-               continue;       /* \r immediately followed by \n */
-
-           /*
-            * The de-escaped line should contain an OID followed by exactly
-            * one space followed by a path.  The path might start with
-            * spaces, so don't be too liberal about parsing.
-            */
-           str[i] = '\0';
-           n = 0;
-           while (str[n] && str[n] != ' ')
-               n++;
-           if (n < 1 || n >= i - 1)
-               ereport(FATAL,
-                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                        errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
-           str[n++] = '\0';
-
-           ti = palloc0(sizeof(tablespaceinfo));
-           ti->oid = pstrdup(str);
-           ti->path = pstrdup(str + n);
-           *tablespaces = lappend(*tablespaces, ti);
-
-           i = 0;
-           continue;
-       }
-       else if (!was_backslash && ch == '\\')
-           was_backslash = true;
-       else
-       {
-           if (i < sizeof(str) - 1)
-               str[i++] = ch;
-           was_backslash = false;
-       }
-   }
-
-   if (i != 0 || was_backslash)    /* last line not terminated? */
-       ereport(FATAL,
-               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
-
-   if (ferror(lfp) || FreeFile(lfp))
-       ereport(FATAL,
-               (errcode_for_file_access(),
-                errmsg("could not read file \"%s\": %m",
-                       TABLESPACE_MAP)));
+XLogRecPtr
+GetXLogWriteRecPtr(void)
+{
+   SpinLockAcquire(&XLogCtl->info_lck);
+   LogwrtResult = XLogCtl->LogwrtResult;
+   SpinLockRelease(&XLogCtl->info_lck);
  
-   return true;
+   return LogwrtResult.Write;
  }
  
  /*
- * Error context callback for errors occurring during rm_redo().
+ * Returns the redo pointer of the last checkpoint or restartpoint. This is
+ * the oldest point in WAL that we still need, if we have to restart recovery.
   */
-static void
-rm_redo_error_callback(void *arg)
+void
+GetOldestRestartPoint(XLogRecPtr *oldrecptr, TimeLineID *oldtli)
  {
-   XLogReaderState *record = (XLogReaderState *) arg;
-   StringInfoData buf;
-
-   initStringInfo(&buf);
-   xlog_outdesc(&buf, record);
-   xlog_block_info(&buf, record);
-
-   /* translator: %s is a WAL record description */
-   errcontext("WAL redo at %X/%X for %s",
-              LSN_FORMAT_ARGS(record->ReadRecPtr),
-              buf.data);
-
-   pfree(buf.data);
+   LWLockAcquire(ControlFileLock, LW_SHARED);
+   *oldrecptr = ControlFile->checkPointCopy.redo;
+   *oldtli = ControlFile->checkPointCopy.ThisTimeLineID;
+   LWLockRelease(ControlFileLock);
  }
  
  /*
@@ -12424,715 +9153,8 @@ CancelBackup(void)
     }
  }
  
-/*
- * Read the XLOG page containing RecPtr into readBuf (if not read already).
- * Returns number of bytes read, if the page is read successfully, or -1
- * in case of errors.  When errors occur, they are ereport'ed, but only
- * if they have not been previously reported.
- *
- * This is responsible for restoring files from archive as needed, as well
- * as for waiting for the requested WAL record to arrive in standby mode.
- *
- * 'emode' specifies the log level used for reporting "file not found" or
- * "end of WAL" situations in archive recovery, or in standby mode when a
- * trigger file is found. If set to WARNING or below, XLogPageRead() returns
- * false in those situations, on higher log levels the ereport() won't
- * return.
- *
- * In standby mode, if after a successful return of XLogPageRead() the
- * caller finds the record it's interested in to be broken, it should
- * ereport the error with the level determined by
- * emode_for_corrupt_record(), and then set lastSourceFailed
- * and call XLogPageRead() again with the same arguments. This lets
- * XLogPageRead() to try fetching the record from another source, or to
- * sleep and retry.
- */
-static int
-XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
-            XLogRecPtr targetRecPtr, char *readBuf)
-{
-   XLogPageReadPrivate *private =
-   (XLogPageReadPrivate *) xlogreader->private_data;
-   int         emode = private->emode;
-   uint32      targetPageOff;
-   XLogSegNo   targetSegNo PG_USED_FOR_ASSERTS_ONLY;
-   int         r;
-
-   XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
-   targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
-
-   /*
-    * See if we need to switch to a new segment because the requested record
-    * is not in the currently open one.
-    */
-   if (readFile >= 0 &&
-       !XLByteInSeg(targetPagePtr, readSegNo, wal_segment_size))
-   {
-       /*
-        * Request a restartpoint if we've replayed too much xlog since the
-        * last one.
-        */
-       if (ArchiveRecoveryRequested && IsUnderPostmaster)
-       {
-           if (XLogCheckpointNeeded(readSegNo))
-           {
-               (void) GetRedoRecPtr();
-               if (XLogCheckpointNeeded(readSegNo))
-                   RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
-           }
-       }
-
-       close(readFile);
-       readFile = -1;
-       readSource = XLOG_FROM_ANY;
-   }
-
-   XLByteToSeg(targetPagePtr, readSegNo, wal_segment_size);
-
-retry:
-   /* See if we need to retrieve more data */
-   if (readFile < 0 ||
-       (readSource == XLOG_FROM_STREAM &&
-        flushedUpto < targetPagePtr + reqLen))
-   {
-       if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
-                                        private->randAccess,
-                                        private->fetching_ckpt,
-                                        targetRecPtr,
-                                        private->replayTLI,
-                                        xlogreader->EndRecPtr))
-       {
-           if (readFile >= 0)
-               close(readFile);
-           readFile = -1;
-           readLen = 0;
-           readSource = XLOG_FROM_ANY;
-
-           return -1;
-       }
-   }
-
-   /*
-    * At this point, we have the right segment open and if we're streaming we
-    * know the requested record is in it.
-    */
-   Assert(readFile != -1);
-
-   /*
-    * If the current segment is being streamed from the primary, calculate
-    * how much of the current page we have received already. We know the
-    * requested record has been received, but this is for the benefit of
-    * future calls, to allow quick exit at the top of this function.
-    */
-   if (readSource == XLOG_FROM_STREAM)
-   {
-       if (((targetPagePtr) / XLOG_BLCKSZ) != (flushedUpto / XLOG_BLCKSZ))
-           readLen = XLOG_BLCKSZ;
-       else
-           readLen = XLogSegmentOffset(flushedUpto, wal_segment_size) -
-               targetPageOff;
-   }
-   else
-       readLen = XLOG_BLCKSZ;
-
-   /* Read the requested page */
-   readOff = targetPageOff;
-
-   pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
-   r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
-   if (r != XLOG_BLCKSZ)
-   {
-       char        fname[MAXFNAMELEN];
-       int         save_errno = errno;
-
-       pgstat_report_wait_end();
-       XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
-       if (r < 0)
-       {
-           errno = save_errno;
-           ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-                   (errcode_for_file_access(),
-                    errmsg("could not read from log segment %s, offset %u: %m",
-                           fname, readOff)));
-       }
-       else
-           ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
-                   (errcode(ERRCODE_DATA_CORRUPTED),
-                    errmsg("could not read from log segment %s, offset %u: read %d of %zu",
-                           fname, readOff, r, (Size) XLOG_BLCKSZ)));
-       goto next_record_is_invalid;
-   }
-   pgstat_report_wait_end();
-
-   Assert(targetSegNo == readSegNo);
-   Assert(targetPageOff == readOff);
-   Assert(reqLen <= readLen);
-
-   xlogreader->seg.ws_tli = curFileTLI;
-
-   /*
-    * Check the page header immediately, so that we can retry immediately if
-    * it's not valid. This may seem unnecessary, because ReadPageInternal()
-    * validates the page header anyway, and would propagate the failure up to
-    * ReadRecord(), which would retry. However, there's a corner case with
-    * continuation records, if a record is split across two pages such that
-    * we would need to read the two pages from different sources. For
-    * example, imagine a scenario where a streaming replica is started up,
-    * and replay reaches a record that's split across two WAL segments. The
-    * first page is only available locally, in pg_wal, because it's already
-    * been recycled on the primary. The second page, however, is not present
-    * in pg_wal, and we should stream it from the primary. There is a
-    * recycled WAL segment present in pg_wal, with garbage contents, however.
-    * We would read the first page from the local WAL segment, but when
-    * reading the second page, we would read the bogus, recycled, WAL
-    * segment. If we didn't catch that case here, we would never recover,
-    * because ReadRecord() would retry reading the whole record from the
-    * beginning.
-    *
-    * Of course, this only catches errors in the page header, which is what
-    * happens in the case of a recycled WAL segment. Other kinds of errors or
-    * corruption still has the same problem. But this at least fixes the
-    * common case, which can happen as part of normal operation.
-    *
-    * Validating the page header is cheap enough that doing it twice
-    * shouldn't be a big deal from a performance point of view.
-    *
-    * When not in standby mode, an invalid page header should cause recovery
-    * to end, not retry reading the page, so we don't need to validate the
-    * page header here for the retry. Instead, ReadPageInternal() is
-    * responsible for the validation.
-    */
-   if (StandbyMode &&
-       !XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
-   {
-       /*
-        * Emit this error right now then retry this page immediately. Use
-        * errmsg_internal() because the message was already translated.
-        */
-       if (xlogreader->errormsg_buf[0])
-           ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
-                   (errmsg_internal("%s", xlogreader->errormsg_buf)));
-
-       /* reset any error XLogReaderValidatePageHeader() might have set */
-       xlogreader->errormsg_buf[0] = '\0';
-       goto next_record_is_invalid;
-   }
-
-   return readLen;
-
-next_record_is_invalid:
-   lastSourceFailed = true;
-
-   if (readFile >= 0)
-       close(readFile);
-   readFile = -1;
-   readLen = 0;
-   readSource = XLOG_FROM_ANY;
-
-   /* In standby-mode, keep trying */
-   if (StandbyMode)
-       goto retry;
-   else
-       return -1;
-}
-
-/*
- * Open the WAL segment containing WAL location 'RecPtr'.
- *
- * The segment can be fetched via restore_command, or via walreceiver having
- * streamed the record, or it can already be present in pg_wal. Checking
- * pg_wal is mainly for crash recovery, but it will be polled in standby mode
- * too, in case someone copies a new segment directly to pg_wal. That is not
- * documented or recommended, though.
- *
- * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
- * prepare to read WAL starting from RedoStartLSN after this.
- *
- * 'RecPtr' might not point to the beginning of the record we're interested
- * in, it might also point to the page or segment header. In that case,
- * 'tliRecPtr' is the position of the WAL record we're interested in. It is
- * used to decide which timeline to stream the requested WAL from.
- *
- * 'replayLSN' is the current replay LSN, so that if we scan for new
- * timelines, we can reject a switch to a timeline that branched off before
- * this point.
- *
- * If the record is not immediately available, the function returns false
- * if we're not in standby mode. In standby mode, waits for it to become
- * available.
- *
- * When the requested record becomes available, the function opens the file
- * containing it (if not open already), and returns true. When end of standby
- * mode is triggered by the user, and there is no more WAL available, returns
- * false.
- */
-static bool
-WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
-                           bool fetching_ckpt, XLogRecPtr tliRecPtr,
-                           TimeLineID replayTLI, XLogRecPtr replayLSN)
-{
-   static TimestampTz last_fail_time = 0;
-   TimestampTz now;
-   bool        streaming_reply_sent = false;
-
-   /*-------
-    * Standby mode is implemented by a state machine:
-    *
-    * 1. Read from either archive or pg_wal (XLOG_FROM_ARCHIVE), or just
-    *    pg_wal (XLOG_FROM_PG_WAL)
-    * 2. Check trigger file
-    * 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
-    * 4. Rescan timelines
-    * 5. Sleep wal_retrieve_retry_interval milliseconds, and loop back to 1.
-    *
-    * Failure to read from the current source advances the state machine to
-    * the next state.
-    *
-    * 'currentSource' indicates the current state. There are no currentSource
-    * values for "check trigger", "rescan timelines", and "sleep" states,
-    * those actions are taken when reading from the previous source fails, as
-    * part of advancing to the next state.
-    *
-    * If standby mode is turned off while reading WAL from stream, we move
-    * to XLOG_FROM_ARCHIVE and reset lastSourceFailed, to force fetching
-    * the files (which would be required at end of recovery, e.g., timeline
-    * history file) from archive or pg_wal. We don't need to kill WAL receiver
-    * here because it's already stopped when standby mode is turned off at
-    * the end of recovery.
-    *-------
-    */
-   if (!InArchiveRecovery)
-       currentSource = XLOG_FROM_PG_WAL;
-   else if (currentSource == XLOG_FROM_ANY ||
-            (!StandbyMode && currentSource == XLOG_FROM_STREAM))
-   {
-       lastSourceFailed = false;
-       currentSource = XLOG_FROM_ARCHIVE;
-   }
-
-   for (;;)
-   {
-       XLogSource  oldSource = currentSource;
-       bool        startWalReceiver = false;
-
-       /*
-        * First check if we failed to read from the current source, and
-        * advance the state machine if so. The failure to read might've
-        * happened outside this function, e.g when a CRC check fails on a
-        * record, or within this loop.
-        */
-       if (lastSourceFailed)
-       {
-           switch (currentSource)
-           {
-               case XLOG_FROM_ARCHIVE:
-               case XLOG_FROM_PG_WAL:
-
-                   /*
-                    * Check to see if the trigger file exists. Note that we
-                    * do this only after failure, so when you create the
-                    * trigger file, we still finish replaying as much as we
-                    * can from archive and pg_wal before failover.
-                    */
-                   if (StandbyMode && CheckForStandbyTrigger())
-                   {
-                       XLogShutdownWalRcv();
-                       return false;
-                   }
-
-                   /*
-                    * Not in standby mode, and we've now tried the archive
-                    * and pg_wal.
-                    */
-                   if (!StandbyMode)
-                       return false;
-
-                   /*
-                    * Move to XLOG_FROM_STREAM state, and set to start a
-                    * walreceiver if necessary.
-                    */
-                   currentSource = XLOG_FROM_STREAM;
-                   startWalReceiver = true;
-                   break;
-
-               case XLOG_FROM_STREAM:
-
-                   /*
-                    * Failure while streaming. Most likely, we got here
-                    * because streaming replication was terminated, or
-                    * promotion was triggered. But we also get here if we
-                    * find an invalid record in the WAL streamed from the
-                    * primary, in which case something is seriously wrong.
-                    * There's little chance that the problem will just go
-                    * away, but PANIC is not good for availability either,
-                    * especially in hot standby mode. So, we treat that the
-                    * same as disconnection, and retry from archive/pg_wal
-                    * again. The WAL in the archive should be identical to
-                    * what was streamed, so it's unlikely that it helps, but
-                    * one can hope...
-                    */
-
-                   /*
-                    * We should be able to move to XLOG_FROM_STREAM only in
-                    * standby mode.
-                    */
-                   Assert(StandbyMode);
-
-                   /*
-                    * Before we leave XLOG_FROM_STREAM state, make sure that
-                    * walreceiver is not active, so that it won't overwrite
-                    * WAL that we restore from archive.
-                    */
-                   if (WalRcvStreaming())
-                       XLogShutdownWalRcv();
-
-                   /*
-                    * Before we sleep, re-scan for possible new timelines if
-                    * we were requested to recover to the latest timeline.
-                    */
-                   if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
-                   {
-                       if (rescanLatestTimeLine(replayTLI, replayLSN))
-                       {
-                           currentSource = XLOG_FROM_ARCHIVE;
-                           break;
-                       }
-                   }
-
-                   /*
-                    * XLOG_FROM_STREAM is the last state in our state
-                    * machine, so we've exhausted all the options for
-                    * obtaining the requested WAL. We're going to loop back
-                    * and retry from the archive, but if it hasn't been long
-                    * since last attempt, sleep wal_retrieve_retry_interval
-                    * milliseconds to avoid busy-waiting.
-                    */
-                   now = GetCurrentTimestamp();
-                   if (!TimestampDifferenceExceeds(last_fail_time, now,
-                                                   wal_retrieve_retry_interval))
-                   {
-                       long        wait_time;
-
-                       wait_time = wal_retrieve_retry_interval -
-                           TimestampDifferenceMilliseconds(last_fail_time, now);
-
-                       (void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
-                                        WL_LATCH_SET | WL_TIMEOUT |
-                                        WL_EXIT_ON_PM_DEATH,
-                                        wait_time,
-                                        WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
-                       ResetLatch(&XLogCtl->recoveryWakeupLatch);
-                       now = GetCurrentTimestamp();
-
-                       /* Handle interrupt signals of startup process */
-                       HandleStartupProcInterrupts();
-                   }
-                   last_fail_time = now;
-                   currentSource = XLOG_FROM_ARCHIVE;
-                   break;
-
-               default:
-                   elog(ERROR, "unexpected WAL source %d", currentSource);
-           }
-       }
-       else if (currentSource == XLOG_FROM_PG_WAL)
-       {
-           /*
-            * We just successfully read a file in pg_wal. We prefer files in
-            * the archive over ones in pg_wal, so try the next file again
-            * from the archive first.
-            */
-           if (InArchiveRecovery)
-               currentSource = XLOG_FROM_ARCHIVE;
-       }
-
-       if (currentSource != oldSource)
-           elog(DEBUG2, "switched WAL source from %s to %s after %s",
-                xlogSourceNames[oldSource], xlogSourceNames[currentSource],
-                lastSourceFailed ? "failure" : "success");
-
-       /*
-        * We've now handled possible failure. Try to read from the chosen
-        * source.
-        */
-       lastSourceFailed = false;
-
-       switch (currentSource)
-       {
-           case XLOG_FROM_ARCHIVE:
-           case XLOG_FROM_PG_WAL:
-
-               /*
-                * WAL receiver must not be running when reading WAL from
-                * archive or pg_wal.
-                */
-               Assert(!WalRcvStreaming());
-
-               /* Close any old file we might have open. */
-               if (readFile >= 0)
-               {
-                   close(readFile);
-                   readFile = -1;
-               }
-               /* Reset curFileTLI if random fetch. */
-               if (randAccess)
-                   curFileTLI = 0;
-
-               /*
-                * Try to restore the file from archive, or read an existing
-                * file from pg_wal.
-                */
-               readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
-                                             currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
-                                             currentSource);
-               if (readFile >= 0)
-                   return true;    /* success! */
-
-               /*
-                * Nope, not found in archive or pg_wal.
-                */
-               lastSourceFailed = true;
-               break;
-
-           case XLOG_FROM_STREAM:
-               {
-                   bool        havedata;
-
-                   /*
-                    * We should be able to move to XLOG_FROM_STREAM only in
-                    * standby mode.
-                    */
-                   Assert(StandbyMode);
-
-                   /*
-                    * First, shutdown walreceiver if its restart has been
-                    * requested -- but no point if we're already slated for
-                    * starting it.
-                    */
-                   if (pendingWalRcvRestart && !startWalReceiver)
-                   {
-                       XLogShutdownWalRcv();
-
-                       /*
-                        * Re-scan for possible new timelines if we were
-                        * requested to recover to the latest timeline.
-                        */
-                       if (recoveryTargetTimeLineGoal ==
-                           RECOVERY_TARGET_TIMELINE_LATEST)
-                           rescanLatestTimeLine(replayTLI, replayLSN);
-
-                       startWalReceiver = true;
-                   }
-                   pendingWalRcvRestart = false;
-
-                   /*
-                    * Launch walreceiver if needed.
-                    *
-                    * If fetching_ckpt is true, RecPtr points to the initial
-                    * checkpoint location. In that case, we use RedoStartLSN
-                    * as the streaming start position instead of RecPtr, so
-                    * that when we later jump backwards to start redo at
-                    * RedoStartLSN, we will have the logs streamed already.
-                    */
-                   if (startWalReceiver &&
-                       PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
-                   {
-                       XLogRecPtr  ptr;
-                       TimeLineID  tli;
-
-                       if (fetching_ckpt)
-                       {
-                           ptr = RedoStartLSN;
-                           tli = ControlFile->checkPointCopy.ThisTimeLineID;
-                       }
-                       else
-                       {
-                           ptr = RecPtr;
-
-                           /*
-                            * Use the record begin position to determine the
-                            * TLI, rather than the position we're reading.
-                            */
-                           tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
-                           if (curFileTLI > 0 && tli < curFileTLI)
-                               elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
-                                    LSN_FORMAT_ARGS(tliRecPtr),
-                                    tli, curFileTLI);
-                       }
-                       curFileTLI = tli;
-                       LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
-                       XLogCtl->InstallXLogFileSegmentActive = true;
-                       LWLockRelease(ControlFileLock);
-                       RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
-                                            PrimarySlotName,
-                                            wal_receiver_create_temp_slot);
-                       flushedUpto = 0;
-                   }
-
-                   /*
-                    * Check if WAL receiver is active or wait to start up.
-                    */
-                   if (!WalRcvStreaming())
-                   {
-                       lastSourceFailed = true;
-                       break;
-                   }
-
-                   /*
-                    * Walreceiver is active, so see if new data has arrived.
-                    *
-                    * We only advance XLogReceiptTime when we obtain fresh
-                    * WAL from walreceiver and observe that we had already
-                    * processed everything before the most recent "chunk"
-                    * that it flushed to disk.  In steady state where we are
-                    * keeping up with the incoming data, XLogReceiptTime will
-                    * be updated on each cycle. When we are behind,
-                    * XLogReceiptTime will not advance, so the grace time
-                    * allotted to conflicting queries will decrease.
-                    */
-                   if (RecPtr < flushedUpto)
-                       havedata = true;
-                   else
-                   {
-                       XLogRecPtr  latestChunkStart;
-
-                       flushedUpto = GetWalRcvFlushRecPtr(&latestChunkStart, &receiveTLI);
-                       if (RecPtr < flushedUpto && receiveTLI == curFileTLI)
-                       {
-                           havedata = true;
-                           if (latestChunkStart <= RecPtr)
-                           {
-                               XLogReceiptTime = GetCurrentTimestamp();
-                               SetCurrentChunkStartTime(XLogReceiptTime);
-                           }
-                       }
-                       else
-                           havedata = false;
-                   }
-                   if (havedata)
-                   {
-                       /*
-                        * Great, streamed far enough.  Open the file if it's
-                        * not open already.  Also read the timeline history
-                        * file if we haven't initialized timeline history
-                        * yet; it should be streamed over and present in
-                        * pg_wal by now.  Use XLOG_FROM_STREAM so that source
-                        * info is set correctly and XLogReceiptTime isn't
-                        * changed.
-                        *
-                        * NB: We must set readTimeLineHistory based on
-                        * recoveryTargetTLI, not receiveTLI. Normally they'll
-                        * be the same, but if recovery_target_timeline is
-                        * 'latest' and archiving is configured, then it's
-                        * possible that we managed to retrieve one or more
-                        * new timeline history files from the archive,
-                        * updating recoveryTargetTLI.
-                        */
-                       if (readFile < 0)
-                       {
-                           if (!expectedTLEs)
-                               expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
-                           readFile = XLogFileRead(readSegNo, PANIC,
-                                                   receiveTLI,
-                                                   XLOG_FROM_STREAM, false);
-                           Assert(readFile >= 0);
-                       }
-                       else
-                       {
-                           /* just make sure source info is correct... */
-                           readSource = XLOG_FROM_STREAM;
-                           XLogReceiptSource = XLOG_FROM_STREAM;
-                           return true;
-                       }
-                       break;
-                   }
-
-                   /*
-                    * Data not here yet. Check for trigger, then wait for
-                    * walreceiver to wake us up when new WAL arrives.
-                    */
-                   if (CheckForStandbyTrigger())
-                   {
-                       /*
-                        * Note that we don't "return false" immediately here.
-                        * After being triggered, we still want to replay all
-                        * the WAL that was already streamed. It's in pg_wal
-                        * now, so we just treat this as a failure, and the
-                        * state machine will move on to replay the streamed
-                        * WAL from pg_wal, and then recheck the trigger and
-                        * exit replay.
-                        */
-                       lastSourceFailed = true;
-                       break;
-                   }
-
-                   /*
-                    * Since we have replayed everything we have received so
-                    * far and are about to start waiting for more WAL, let's
-                    * tell the upstream server our replay location now so
-                    * that pg_stat_replication doesn't show stale
-                    * information.
-                    */
-                   if (!streaming_reply_sent)
-                   {
-                       WalRcvForceReply();
-                       streaming_reply_sent = true;
-                   }
-
-                   /*
-                    * Wait for more WAL to arrive. Time out after 5 seconds
-                    * to react to a trigger file promptly and to check if the
-                    * WAL receiver is still active.
-                    */
-                   (void) WaitLatch(&XLogCtl->recoveryWakeupLatch,
-                                    WL_LATCH_SET | WL_TIMEOUT |
-                                    WL_EXIT_ON_PM_DEATH,
-                                    5000L, WAIT_EVENT_RECOVERY_WAL_STREAM);
-                   ResetLatch(&XLogCtl->recoveryWakeupLatch);
-                   break;
-               }
-
-           default:
-               elog(ERROR, "unexpected WAL source %d", currentSource);
-       }
-
-       /*
-        * Check for recovery pause here so that we can confirm more quickly
-        * that a requested pause has actually taken effect.
-        */
-       if (((volatile XLogCtlData *) XLogCtl)->recoveryPauseState !=
-           RECOVERY_NOT_PAUSED)
-           recoveryPausesHere(false);
-
-       /*
-        * This possibly-long loop needs to handle interrupts of startup
-        * process.
-        */
-       HandleStartupProcInterrupts();
-   }
-
-   return false;               /* not reached */
-}
-
-/*
- * Set flag to signal the walreceiver to restart.  (The startup process calls
- * this on noticing a relevant configuration change.)
- */
-void
-StartupRequestWalReceiverRestart(void)
-{
-   if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
-   {
-       ereport(LOG,
-               (errmsg("WAL receiver process shutdown requested")));
-
-       pendingWalRcvRestart = true;
-   }
-}
-
  /* Thin wrapper around ShutdownWalRcv(). */
-static void
+void
  XLogShutdownWalRcv(void)
  {
     ShutdownWalRcv();
@@ -13142,153 +9164,25 @@ XLogShutdownWalRcv(void)
     LWLockRelease(ControlFileLock);
  }
  
-/*
- * Determine what log level should be used to report a corrupt WAL record
- * in the current WAL page, previously read by XLogPageRead().
- *
- * 'emode' is the error mode that would be used to report a file-not-found
- * or legitimate end-of-WAL situation.   Generally, we use it as-is, but if
- * we're retrying the exact same record that we've tried previously, only
- * complain the first time to keep the noise down.  However, we only do when
- * reading from pg_wal, because we don't expect any invalid records in archive
- * or in records streamed from the primary. Files in the archive should be complete,
- * and we should never hit the end of WAL because we stop and wait for more WAL
- * to arrive before replaying it.
- *
- * NOTE: This function remembers the RecPtr value it was last called with,
- * to suppress repeated messages about the same record. Only call this when
- * you are about to ereport(), or you might cause a later message to be
- * erroneously suppressed.
- */
-static int
-emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
-{
-   static XLogRecPtr lastComplaint = 0;
-
-   if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
-   {
-       if (RecPtr == lastComplaint)
-           emode = DEBUG1;
-       else
-           lastComplaint = RecPtr;
-   }
-   return emode;
-}
-
-/*
- * Has a standby promotion already been triggered?
- *
- * Unlike CheckForStandbyTrigger(), this works in any process
- * that's connected to shared memory.
- */
-bool
-PromoteIsTriggered(void)
-{
-   /*
-    * We check shared state each time only until a standby promotion is
-    * triggered. We can't trigger a promotion again, so there's no need to
-    * keep checking after the shared variable has once been seen true.
-    */
-   if (LocalPromoteIsTriggered)
-       return true;
-
-   SpinLockAcquire(&XLogCtl->info_lck);
-   LocalPromoteIsTriggered = XLogCtl->SharedPromoteIsTriggered;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   return LocalPromoteIsTriggered;
-}
-
-static void
-SetPromoteIsTriggered(void)
-{
-   SpinLockAcquire(&XLogCtl->info_lck);
-   XLogCtl->SharedPromoteIsTriggered = true;
-   SpinLockRelease(&XLogCtl->info_lck);
-
-   /*
-    * Mark the recovery pause state as 'not paused' because the paused state
-    * ends and promotion continues if a promotion is triggered while recovery
-    * is paused. Otherwise pg_get_wal_replay_pause_state() can mistakenly
-    * return 'paused' while a promotion is ongoing.
-    */
-   SetRecoveryPause(false);
-
-   LocalPromoteIsTriggered = true;
-}
-
-/*
- * Check to see whether the user-specified trigger file exists and whether a
- * promote request has arrived.  If either condition holds, return true.
- */
-static bool
-CheckForStandbyTrigger(void)
-{
-   struct stat stat_buf;
-
-   if (LocalPromoteIsTriggered)
-       return true;
-
-   if (IsPromoteSignaled() && CheckPromoteSignal())
-   {
-       ereport(LOG, (errmsg("received promote request")));
-       RemovePromoteSignalFiles();
-       ResetPromoteSignaled();
-       SetPromoteIsTriggered();
-       return true;
-   }
-
-   if (PromoteTriggerFile == NULL || strcmp(PromoteTriggerFile, "") == 0)
-       return false;
-
-   if (stat(PromoteTriggerFile, &stat_buf) == 0)
-   {
-       ereport(LOG,
-               (errmsg("promote trigger file found: %s", PromoteTriggerFile)));
-       unlink(PromoteTriggerFile);
-       SetPromoteIsTriggered();
-       return true;
-   }
-   else if (errno != ENOENT)
-       ereport(ERROR,
-               (errcode_for_file_access(),
-                errmsg("could not stat promote trigger file \"%s\": %m",
-                       PromoteTriggerFile)));
-
-   return false;
-}
-
-/*
- * Remove the files signaling a standby promotion request.
- */
+/* Enable WAL file recycling and preallocation. */
  void
-RemovePromoteSignalFiles(void)
+SetInstallXLogFileSegmentActive(void)
  {
-   unlink(PROMOTE_SIGNAL_FILE);
+   LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
+   XLogCtl->InstallXLogFileSegmentActive = true;
+   LWLockRelease(ControlFileLock);
  }
  
-/*
- * Check to see if a promote request has arrived.
- */
  bool
-CheckPromoteSignal(void)
+IsInstallXLogFileSegmentActive(void)
  {
-   struct stat stat_buf;
-
-   if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
-       return true;
+   bool        result;
  
-   return false;
-}
+   LWLockAcquire(ControlFileLock, LW_SHARED);
+   result = XLogCtl->InstallXLogFileSegmentActive;
+   LWLockRelease(ControlFileLock);
  
-/*
- * Wake up startup process to replay newly arrived WAL, or to notice that
- * failover has been requested.
- */
-void
-WakeupRecovery(void)
-{
-   SetLatch(&XLogCtl->recoveryWakeupLatch);
+   return result;
  }
  
  /*
@@ -13301,12 +9195,3 @@ SetWalWriterSleeping(bool sleeping)
     XLogCtl->WalWriterSleeping = sleeping;
     SpinLockRelease(&XLogCtl->info_lck);
  }
-
-/*
- * Schedule a walreceiver wakeup in the main recovery loop.
- */
-void
-XLogRequestWalReceiverReply(void)
-{
-   doRequestWalReceiverReply = true;
-}
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c

index d8af5aad58b39465c69d7338c41645b4ff0f9ffa..2f900533cdb3e67853c86cab19e3fa637a815daf 100644 (file)
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -19,8 +19,8 @@
  #include <unistd.h>
  
  #include "access/htup_details.h"
-#include "access/xlog.h"
  #include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "catalog/pg_type.h"
  #include "funcapi.h"
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c

new file mode 100644 (file)

index 0000000..d5269ed
--- /dev/null
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -0,0 +1,4537 @@
+/*-------------------------------------------------------------------------
+ *
+ * xlogrecovery.c
+ *     Functions for WAL recovery, standby mode
+ *
+ * This source file contains functions controlling WAL recovery.
+ * InitWalRecovery() initializes the system for crash or archive recovery,
+ * or standby mode, depending on configuration options and the state of
+ * the control file and possible backup label file.  PerformWalRecovery()
+ * performs the actual WAL replay, calling the rmgr-specific redo routines.
+ * EndWalRecovery() performs end-of-recovery checks and cleanup actions,
+ * and prepares information needed to initialize the WAL for writes.  In
+ * addition to these three main functions, there are a bunch of functions
+ * for interrogating recovery state and controlling the recovery process.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/access/transam/xlogrecovery.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <math.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "access/timeline.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog_internal.h"
+#include "access/xlogarchive.h"
+#include "access/xlogreader.h"
+#include "access/xlogrecovery.h"
+#include "access/xlogutils.h"
+#include "catalog/pg_control.h"
+#include "commands/tablespace.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgwriter.h"
+#include "postmaster/startup.h"
+#include "replication/basebackup.h"
+#include "replication/walreceiver.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/latch.h"
+#include "storage/pmsignal.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "storage/spin.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/ps_status.h"
+#include "utils/pg_rusage.h"
+
+/* Unsupported old recovery command file names (relative to $PGDATA) */
+#define RECOVERY_COMMAND_FILE  "recovery.conf"
+#define RECOVERY_COMMAND_DONE  "recovery.done"
+
+/*
+ * GUC support
+ */
+const struct config_enum_entry recovery_target_action_options[] = {
+   {"pause", RECOVERY_TARGET_ACTION_PAUSE, false},
+   {"promote", RECOVERY_TARGET_ACTION_PROMOTE, false},
+   {"shutdown", RECOVERY_TARGET_ACTION_SHUTDOWN, false},
+   {NULL, 0, false}
+};
+
+/* options formerly taken from recovery.conf for archive recovery */
+char      *recoveryRestoreCommand = NULL;
+char      *recoveryEndCommand = NULL;
+char      *archiveCleanupCommand = NULL;
+RecoveryTargetType recoveryTarget = RECOVERY_TARGET_UNSET;
+bool       recoveryTargetInclusive = true;
+int            recoveryTargetAction = RECOVERY_TARGET_ACTION_PAUSE;
+TransactionId recoveryTargetXid;
+char      *recovery_target_time_string;
+TimestampTz recoveryTargetTime;
+const char *recoveryTargetName;
+XLogRecPtr recoveryTargetLSN;
+int            recovery_min_apply_delay = 0;
+
+/* options formerly taken from recovery.conf for XLOG streaming */
+char      *PrimaryConnInfo = NULL;
+char      *PrimarySlotName = NULL;
+char      *PromoteTriggerFile = NULL;
+bool       wal_receiver_create_temp_slot = false;
+
+/*
+ * recoveryTargetTimeLineGoal: what the user requested, if any
+ *
+ * recoveryTargetTLIRequested: numeric value of requested timeline, if constant
+ *
+ * recoveryTargetTLI: the currently understood target timeline; changes
+ *
+ * expectedTLEs: a list of TimeLineHistoryEntries for recoveryTargetTLI and
+ * the timelines of its known parents, newest first (so recoveryTargetTLI is
+ * always the first list member).  Only these TLIs are expected to be seen in
+ * the WAL segments we read, and indeed only these TLIs will be considered as
+ * candidate WAL files to open at all.
+ *
+ * curFileTLI: the TLI appearing in the name of the current input WAL file.
+ * (This is not necessarily the same as the timeline from which we are
+ * replaying WAL, which StartupXLOG calls replayTLI, because we could be
+ * scanning data that was copied from an ancestor timeline when the current
+ * file was created.)  During a sequential scan we do not allow this value
+ * to decrease.
+ */
+RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal = RECOVERY_TARGET_TIMELINE_LATEST;
+TimeLineID recoveryTargetTLIRequested = 0;
+TimeLineID recoveryTargetTLI = 0;
+static List *expectedTLEs;
+static TimeLineID curFileTLI;
+
+/*
+ * When ArchiveRecoveryRequested is set, archive recovery was requested,
+ * ie. signal files were present.  When InArchiveRecovery is set, we are
+ * currently recovering using offline XLOG archives.  These variables are only
+ * valid in the startup process.
+ *
+ * When ArchiveRecoveryRequested is true, but InArchiveRecovery is false, we're
+ * currently performing crash recovery using only XLOG files in pg_wal, but
+ * will switch to using offline XLOG archives as soon as we reach the end of
+ * WAL in pg_wal.
+*/
+bool       ArchiveRecoveryRequested = false;
+bool       InArchiveRecovery = false;
+
+/*
+ * When StandbyModeRequested is set, standby mode was requested, i.e.
+ * standby.signal file was present.  When StandbyMode is set, we are currently
+ * in standby mode.  These variables are only valid in the startup process.
+ * They work similarly to ArchiveRecoveryRequested and InArchiveRecovery.
+ */
+static bool StandbyModeRequested = false;
+bool       StandbyMode = false;
+
+/* was a signal file present at startup? */
+static bool standby_signal_file_found = false;
+static bool recovery_signal_file_found = false;
+
+/*
+ * CheckPointLoc is the position of the checkpoint record that determines
+ * where to start the replay.  It comes from the backup label file or the
+ * control file.
+ *
+ * RedoStartLSN is the checkpoint's REDO location, also from the backup label
+ * file or the control file.  In standby mode, XLOG streaming usually starts
+ * from the position where an invalid record was found.  But if we fail to
+ * read even the initial checkpoint record, we use the REDO location instead
+ * of the checkpoint location as the start position of XLOG streaming.
+ * Otherwise we would have to jump backwards to the REDO location after
+ * reading the checkpoint record, because the REDO record can precede the
+ * checkpoint record.
+ */
+static XLogRecPtr CheckPointLoc = InvalidXLogRecPtr;
+static TimeLineID CheckPointTLI = 0;
+static XLogRecPtr RedoStartLSN = InvalidXLogRecPtr;
+static TimeLineID RedoStartTLI = 0;
+
+/*
+ * Local copy of SharedHotStandbyActive variable. False actually means "not
+ * known, need to check the shared state".
+ */
+static bool LocalHotStandbyActive = false;
+
+/*
+ * Local copy of SharedPromoteIsTriggered variable. False actually means "not
+ * known, need to check the shared state".
+ */
+static bool LocalPromoteIsTriggered = false;
+
+/* Has the recovery code requested a walreceiver wakeup? */
+static bool doRequestWalReceiverReply;
+
+/* XLogReader object used to parse the WAL records */
+static XLogReaderState *xlogreader = NULL;
+
+/* Parameters passed down from ReadRecord to the XLogPageRead callback. */
+typedef struct XLogPageReadPrivate
+{
+   int         emode;
+   bool        fetching_ckpt;  /* are we fetching a checkpoint record? */
+   bool        randAccess;
+   TimeLineID  replayTLI;
+} XLogPageReadPrivate;
+
+/* flag to tell XLogPageRead that we have started replaying */
+static bool InRedo = false;
+
+/*
+ * Codes indicating where we got a WAL file from during recovery, or where
+ * to attempt to get one.
+ */
+typedef enum
+{
+   XLOG_FROM_ANY = 0,          /* request to read WAL from any source */
+   XLOG_FROM_ARCHIVE,          /* restored using restore_command */
+   XLOG_FROM_PG_WAL,           /* existing file in pg_wal */
+   XLOG_FROM_STREAM            /* streamed from primary */
+} XLogSource;
+
+/* human-readable names for XLogSources, for debugging output */
+static const char *const xlogSourceNames[] = {"any", "archive", "pg_wal", "stream"};
+
+/*
+ * readFile is -1 or a kernel FD for the log file segment that's currently
+ * open for reading.  readSegNo identifies the segment.  readOff is the offset
+ * of the page just read, readLen indicates how much of it has been read into
+ * readBuf, and readSource indicates where we got the currently open file from.
+ *
+ * Note: we could use Reserve/ReleaseExternalFD to track consumption of this
+ * FD too (like for openLogFile in xlog.c); but it doesn't currently seem
+ * worthwhile, since the XLOG is not read by general-purpose sessions.
+ */
+static int readFile = -1;
+static XLogSegNo readSegNo = 0;
+static uint32 readOff = 0;
+static uint32 readLen = 0;
+static XLogSource readSource = XLOG_FROM_ANY;
+
+/*
+ * Keeps track of which source we're currently reading from. This is
+ * different from readSource in that this is always set, even when we don't
+ * currently have a WAL file open. If lastSourceFailed is set, our last
+ * attempt to read from currentSource failed, and we should try another source
+ * next.
+ *
+ * pendingWalRcvRestart is set when a config change occurs that requires a
+ * walreceiver restart.  This is only valid in XLOG_FROM_STREAM state.
+ */
+static XLogSource currentSource = XLOG_FROM_ANY;
+static bool lastSourceFailed = false;
+static bool pendingWalRcvRestart = false;
+
+/*
+ * These variables track when we last obtained some WAL data to process,
+ * and where we got it from.  (XLogReceiptSource is initially the same as
+ * readSource, but readSource gets reset to zero when we don't have data
+ * to process right now.  It is also different from currentSource, which
+ * also changes when we try to read from a source and fail, while
+ * XLogReceiptSource tracks where we last successfully read some WAL.)
+ */
+static TimestampTz XLogReceiptTime = 0;
+static XLogSource XLogReceiptSource = XLOG_FROM_ANY;
+
+/* Local copy of WalRcv->flushedUpto */
+static XLogRecPtr flushedUpto = 0;
+static TimeLineID receiveTLI = 0;
+
+/*
+ * Copy of minRecoveryPoint and backupEndPoint from the control file.
+ *
+ * In order to reach consistency, we must replay the WAL up to
+ * minRecoveryPoint.  If backupEndRequired is true, we must also reach
+ * backupEndPoint, or if it's invalid, an end-of-backup record corresponding
+ * to backupStartPoint.
+ *
+ * Note: In archive recovery, after consistency has been reached, the
+ * functions in xlog.c will start updating minRecoveryPoint in the control
+ * file.  But this copy of minRecoveryPoint variable reflects the value at the
+ * beginning of recovery, and is *not* updated after consistency is reached.
+ */
+static XLogRecPtr minRecoveryPoint;
+static TimeLineID minRecoveryPointTLI;
+
+static XLogRecPtr backupStartPoint;
+static XLogRecPtr backupEndPoint;
+static bool backupEndRequired = false;
+
+/*
+ * Have we reached a consistent database state?  In crash recovery, we have
+ * to replay all the WAL, so reachedConsistency is never set.  During archive
+ * recovery, the database is consistent once minRecoveryPoint is reached.
+ *
+ * Consistent state means that the system is internally consistent, all
+ * the WAL has been replayed up to a certain point, and importantly, there
+ * is no trace of later actions on disk.
+ */
+bool       reachedConsistency = false;
+
+/* Buffers dedicated to consistency checks of size BLCKSZ */
+static char *replay_image_masked = NULL;
+static char *primary_image_masked = NULL;
+
+
+/*
+ * Shared-memory state for WAL recovery.
+ */
+typedef struct XLogRecoveryCtlData
+{
+   /*
+    * SharedHotStandbyActive indicates if we allow hot standby queries to be
+    * run.  Protected by info_lck.
+    */
+   bool        SharedHotStandbyActive;
+
+   /*
+    * SharedPromoteIsTriggered indicates if a standby promotion has been
+    * triggered.  Protected by info_lck.
+    */
+   bool        SharedPromoteIsTriggered;
+
+   /*
+    * recoveryWakeupLatch is used to wake up the startup process to continue
+    * WAL replay, if it is waiting for WAL to arrive or failover trigger file
+    * to appear.
+    *
+    * Note that the startup process also uses another latch, its procLatch,
+    * to wait for recovery conflict. If we get rid of recoveryWakeupLatch for
+    * signaling the startup process in favor of using its procLatch, which
+    * comports better with possible generic signal handlers using that latch.
+    * But we should not do that because the startup process doesn't assume
+    * that it's waken up by walreceiver process or SIGHUP signal handler
+    * while it's waiting for recovery conflict. The separate latches,
+    * recoveryWakeupLatch and procLatch, should be used for inter-process
+    * communication for WAL replay and recovery conflict, respectively.
+    */
+   Latch       recoveryWakeupLatch;
+
+   /*
+    * Last record successfully replayed.
+    */
+   XLogRecPtr  lastReplayedReadRecPtr; /* start position */
+   XLogRecPtr  lastReplayedEndRecPtr;  /* end+1 position */
+   TimeLineID  lastReplayedTLI;    /* timeline */
+
+   /*
+    * When we're currently replaying a record, ie. in a redo function,
+    * replayEndRecPtr points to the end+1 of the record being replayed,
+    * otherwise it's equal to lastReplayedEndRecPtr.
+    */
+   XLogRecPtr  replayEndRecPtr;
+   TimeLineID  replayEndTLI;
+   /* timestamp of last COMMIT/ABORT record replayed (or being replayed) */
+   TimestampTz recoveryLastXTime;
+
+   /*
+    * timestamp of when we started replaying the current chunk of WAL data,
+    * only relevant for replication or archive recovery
+    */
+   TimestampTz currentChunkStartTime;
+   /* Recovery pause state */
+   RecoveryPauseState recoveryPauseState;
+   ConditionVariable recoveryNotPausedCV;
+
+   slock_t     info_lck;       /* locks shared variables shown above */
+} XLogRecoveryCtlData;
+
+static XLogRecoveryCtlData *XLogRecoveryCtl = NULL;
+
+/*
+ * abortedRecPtr is the start pointer of a broken record at end of WAL when
+ * recovery completes; missingContrecPtr is the location of the first
+ * contrecord that went missing.  See CreateOverwriteContrecordRecord for
+ * details.
+ */
+static XLogRecPtr abortedRecPtr;
+static XLogRecPtr missingContrecPtr;
+
+/*
+ * if recoveryStopsBefore/After returns true, it saves information of the stop
+ * point here
+ */
+static TransactionId recoveryStopXid;
+static TimestampTz recoveryStopTime;
+static XLogRecPtr recoveryStopLSN;
+static char recoveryStopName[MAXFNAMELEN];
+static bool recoveryStopAfter;
+
+/* prototypes for local functions */
+static void ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI);
+
+static void readRecoverySignalFile(void);
+static void validateRecoveryParameters(void);
+static bool read_backup_label(XLogRecPtr *checkPointLoc,
+                             TimeLineID *backupLabelTLI,
+                             bool *backupEndRequired, bool *backupFromStandby);
+static bool read_tablespace_map(List **tablespaces);
+
+static void xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI);
+static void CheckRecoveryConsistency(void);
+static void rm_redo_error_callback(void *arg);
+#ifdef WAL_DEBUG
+static void xlog_outrec(StringInfo buf, XLogReaderState *record);
+#endif
+static void xlog_block_info(StringInfo buf, XLogReaderState *record);
+static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
+                               TimeLineID prevTLI, TimeLineID replayTLI);
+static bool getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime);
+static void verifyBackupPageConsistency(XLogReaderState *record);
+
+static bool recoveryStopsBefore(XLogReaderState *record);
+static bool recoveryStopsAfter(XLogReaderState *record);
+static char *getRecoveryStopReason(void);
+static void recoveryPausesHere(bool endOfRecovery);
+static bool recoveryApplyDelay(XLogReaderState *record);
+static void ConfirmRecoveryPaused(void);
+
+static XLogRecord *ReadRecord(XLogReaderState *xlogreader,
+                             int emode, bool fetching_ckpt, TimeLineID replayTLI);
+
+static int XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr,
+                        int reqLen, XLogRecPtr targetRecPtr, char *readBuf);
+static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
+                                       bool fetching_ckpt,
+                                       XLogRecPtr tliRecPtr,
+                                       TimeLineID replayTLI,
+                                       XLogRecPtr replayLSN);
+static int emode_for_corrupt_record(int emode, XLogRecPtr RecPtr);
+static XLogRecord *ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
+                                       int whichChkpt, bool report, TimeLineID replayTLI);
+static bool rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN);
+static int XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
+                        XLogSource source, bool notfoundOk);
+static int XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source);
+
+static bool CheckForStandbyTrigger(void);
+static void SetPromoteIsTriggered(void);
+static bool HotStandbyActiveInReplay(void);
+
+static void SetCurrentChunkStartTime(TimestampTz xtime);
+static void SetLatestXTime(TimestampTz xtime);
+
+/*
+ * Initialization of shared memory for WAL recovery
+ */
+Size
+XLogRecoveryShmemSize(void)
+{
+   Size        size;
+
+   /* XLogRecoveryCtl */
+   size = sizeof(XLogRecoveryCtlData);
+
+   return size;
+}
+
+void
+XLogRecoveryShmemInit(void)
+{
+   bool        found;
+
+   XLogRecoveryCtl = (XLogRecoveryCtlData *)
+       ShmemInitStruct("XLOG Recovery Ctl", XLogRecoveryShmemSize(), &found);
+   if (found)
+       return;
+   memset(XLogRecoveryCtl, 0, sizeof(XLogRecoveryCtlData));
+
+   SpinLockInit(&XLogRecoveryCtl->info_lck);
+   InitSharedLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+   ConditionVariableInit(&XLogRecoveryCtl->recoveryNotPausedCV);
+}
+
+/*
+ * Prepare the system for WAL recovery, if needed.
+ *
+ * This is called by StartupXLOG() which coordinates the server startup
+ * sequence.  This function analyzes the control file and the backup label
+ * file, if any, and figures out whether we need to perform crash recovery or
+ * archive recovery, and how far we need to replay the WAL to reach a
+ * consistent state.
+ *
+ * This doesn't yet change the on-disk state, except for creating the symlinks
+ * from table space map file if any, and for fetching WAL files needed to find
+ * the checkpoint record.  On entry, the caller has already read the control
+ * file into memory, and passes it as argument.  This function updates it to
+ * reflect the recovery state, and the caller is expected to write it back to
+ * disk does after initializing other subsystems, but before calling
+ * PerformWalRecovery().
+ *
+ * This initializes some global variables like ArchiveModeRequested, and
+ * StandbyModeRequested and InRecovery.
+ */
+void
+InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdown_ptr,
+               bool *haveBackupLabel_ptr, bool *haveTblspcMap_ptr)
+{
+   XLogPageReadPrivate *private;
+   struct stat st;
+   bool        wasShutdown;
+   XLogRecord *record;
+   DBState     dbstate_at_startup;
+   bool        haveTblspcMap = false;
+   bool        haveBackupLabel = false;
+   CheckPoint  checkPoint;
+   bool        backupFromStandby = false;
+
+   dbstate_at_startup = ControlFile->state;
+
+   /*
+    * Initialize on the assumption we want to recover to the latest timeline
+    * that's active according to pg_control.
+    */
+   if (ControlFile->minRecoveryPointTLI >
+       ControlFile->checkPointCopy.ThisTimeLineID)
+       recoveryTargetTLI = ControlFile->minRecoveryPointTLI;
+   else
+       recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
+
+   /*
+    * Check for signal files, and if so set up state for offline recovery
+    */
+   readRecoverySignalFile();
+   validateRecoveryParameters();
+
+   if (ArchiveRecoveryRequested)
+   {
+       if (StandbyModeRequested)
+           ereport(LOG,
+                   (errmsg("entering standby mode")));
+       else if (recoveryTarget == RECOVERY_TARGET_XID)
+           ereport(LOG,
+                   (errmsg("starting point-in-time recovery to XID %u",
+                           recoveryTargetXid)));
+       else if (recoveryTarget == RECOVERY_TARGET_TIME)
+           ereport(LOG,
+                   (errmsg("starting point-in-time recovery to %s",
+                           timestamptz_to_str(recoveryTargetTime))));
+       else if (recoveryTarget == RECOVERY_TARGET_NAME)
+           ereport(LOG,
+                   (errmsg("starting point-in-time recovery to \"%s\"",
+                           recoveryTargetName)));
+       else if (recoveryTarget == RECOVERY_TARGET_LSN)
+           ereport(LOG,
+                   (errmsg("starting point-in-time recovery to WAL location (LSN) \"%X/%X\"",
+                           LSN_FORMAT_ARGS(recoveryTargetLSN))));
+       else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
+           ereport(LOG,
+                   (errmsg("starting point-in-time recovery to earliest consistent point")));
+       else
+           ereport(LOG,
+                   (errmsg("starting archive recovery")));
+   }
+
+   /*
+    * Take ownership of the wakeup latch if we're going to sleep during
+    * recovery.
+    */
+   if (ArchiveRecoveryRequested)
+       OwnLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+
+   private = palloc0(sizeof(XLogPageReadPrivate));
+   xlogreader =
+       XLogReaderAllocate(wal_segment_size, NULL,
+                          XL_ROUTINE(.page_read = &XLogPageRead,
+                                     .segment_open = NULL,
+                                     .segment_close = wal_segment_close),
+                          private);
+   if (!xlogreader)
+       ereport(ERROR,
+               (errcode(ERRCODE_OUT_OF_MEMORY),
+                errmsg("out of memory"),
+                errdetail("Failed while allocating a WAL reading processor.")));
+   xlogreader->system_identifier = ControlFile->system_identifier;
+
+   /*
+    * Allocate two page buffers dedicated to WAL consistency checks.  We do
+    * it this way, rather than just making static arrays, for two reasons:
+    * (1) no need to waste the storage in most instantiations of the backend;
+    * (2) a static char array isn't guaranteed to have any particular
+    * alignment, whereas palloc() will provide MAXALIGN'd storage.
+    */
+   replay_image_masked = (char *) palloc(BLCKSZ);
+   primary_image_masked = (char *) palloc(BLCKSZ);
+
+   if (read_backup_label(&CheckPointLoc, &CheckPointTLI, &backupEndRequired,
+                         &backupFromStandby))
+   {
+       List       *tablespaces = NIL;
+
+       /*
+        * Archive recovery was requested, and thanks to the backup label
+        * file, we know how far we need to replay to reach consistency. Enter
+        * archive recovery directly.
+        */
+       InArchiveRecovery = true;
+       if (StandbyModeRequested)
+           StandbyMode = true;
+
+       /*
+        * When a backup_label file is present, we want to roll forward from
+        * the checkpoint it identifies, rather than using pg_control.
+        */
+       record = ReadCheckpointRecord(xlogreader, CheckPointLoc, 0, true, CheckPointTLI);
+       if (record != NULL)
+       {
+           memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
+           wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
+           ereport(DEBUG1,
+                   (errmsg_internal("checkpoint record is at %X/%X",
+                                    LSN_FORMAT_ARGS(CheckPointLoc))));
+           InRecovery = true;  /* force recovery even if SHUTDOWNED */
+
+           /*
+            * Make sure that REDO location exists. This may not be the case
+            * if there was a crash during an online backup, which left a
+            * backup_label around that references a WAL segment that's
+            * already been archived.
+            */
+           if (checkPoint.redo < CheckPointLoc)
+           {
+               XLogBeginRead(xlogreader, checkPoint.redo);
+               if (!ReadRecord(xlogreader, LOG, false,
+                               checkPoint.ThisTimeLineID))
+                   ereport(FATAL,
+                           (errmsg("could not find redo location referenced by checkpoint record"),
+                            errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
+                                    "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
+                                    "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
+                                    DataDir, DataDir, DataDir)));
+           }
+       }
+       else
+       {
+           ereport(FATAL,
+                   (errmsg("could not locate required checkpoint record"),
+                    errhint("If you are restoring from a backup, touch \"%s/recovery.signal\" and add required recovery options.\n"
+                            "If you are not restoring from a backup, try removing the file \"%s/backup_label\".\n"
+                            "Be careful: removing \"%s/backup_label\" will result in a corrupt cluster if restoring from a backup.",
+                            DataDir, DataDir, DataDir)));
+           wasShutdown = false;    /* keep compiler quiet */
+       }
+
+       /* Read the tablespace_map file if present and create symlinks. */
+       if (read_tablespace_map(&tablespaces))
+       {
+           ListCell   *lc;
+
+           foreach(lc, tablespaces)
+           {
+               tablespaceinfo *ti = lfirst(lc);
+               char       *linkloc;
+
+               linkloc = psprintf("pg_tblspc/%s", ti->oid);
+
+               /*
+                * Remove the existing symlink if any and Create the symlink
+                * under PGDATA.
+                */
+               remove_tablespace_symlink(linkloc);
+
+               if (symlink(ti->path, linkloc) < 0)
+                   ereport(ERROR,
+                           (errcode_for_file_access(),
+                            errmsg("could not create symbolic link \"%s\": %m",
+                                   linkloc)));
+
+               pfree(ti->oid);
+               pfree(ti->path);
+               pfree(ti);
+           }
+
+           /* tell the caller to delete it later */
+           haveTblspcMap = true;
+       }
+
+       /* tell the caller to delete it later */
+       haveBackupLabel = true;
+   }
+   else
+   {
+       /*
+        * If tablespace_map file is present without backup_label file, there
+        * is no use of such file.  There is no harm in retaining it, but it
+        * is better to get rid of the map file so that we don't have any
+        * redundant file in data directory and it will avoid any sort of
+        * confusion.  It seems prudent though to just rename the file out of
+        * the way rather than delete it completely, also we ignore any error
+        * that occurs in rename operation as even if map file is present
+        * without backup_label file, it is harmless.
+        */
+       if (stat(TABLESPACE_MAP, &st) == 0)
+       {
+           unlink(TABLESPACE_MAP_OLD);
+           if (durable_rename(TABLESPACE_MAP, TABLESPACE_MAP_OLD, DEBUG1) == 0)
+               ereport(LOG,
+                       (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
+                               TABLESPACE_MAP, BACKUP_LABEL_FILE),
+                        errdetail("File \"%s\" was renamed to \"%s\".",
+                                  TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+           else
+               ereport(LOG,
+                       (errmsg("ignoring file \"%s\" because no file \"%s\" exists",
+                               TABLESPACE_MAP, BACKUP_LABEL_FILE),
+                        errdetail("Could not rename file \"%s\" to \"%s\": %m.",
+                                  TABLESPACE_MAP, TABLESPACE_MAP_OLD)));
+       }
+
+       /*
+        * It's possible that archive recovery was requested, but we don't
+        * know how far we need to replay the WAL before we reach consistency.
+        * This can happen for example if a base backup is taken from a
+        * running server using an atomic filesystem snapshot, without calling
+        * pg_start/stop_backup. Or if you just kill a running primary server
+        * and put it into archive recovery by creating a recovery signal
+        * file.
+        *
+        * Our strategy in that case is to perform crash recovery first,
+        * replaying all the WAL present in pg_wal, and only enter archive
+        * recovery after that.
+        *
+        * But usually we already know how far we need to replay the WAL (up
+        * to minRecoveryPoint, up to backupEndPoint, or until we see an
+        * end-of-backup record), and we can enter archive recovery directly.
+        */
+       if (ArchiveRecoveryRequested &&
+           (ControlFile->minRecoveryPoint != InvalidXLogRecPtr ||
+            ControlFile->backupEndRequired ||
+            ControlFile->backupEndPoint != InvalidXLogRecPtr ||
+            ControlFile->state == DB_SHUTDOWNED))
+       {
+           InArchiveRecovery = true;
+           if (StandbyModeRequested)
+               StandbyMode = true;
+       }
+
+       /* Get the last valid checkpoint record. */
+       CheckPointLoc = ControlFile->checkPoint;
+       CheckPointTLI = ControlFile->checkPointCopy.ThisTimeLineID;
+       RedoStartLSN = ControlFile->checkPointCopy.redo;
+       RedoStartTLI = ControlFile->checkPointCopy.ThisTimeLineID;
+       record = ReadCheckpointRecord(xlogreader, CheckPointLoc, 1, true,
+                                     CheckPointTLI);
+       if (record != NULL)
+       {
+           ereport(DEBUG1,
+                   (errmsg_internal("checkpoint record is at %X/%X",
+                                    LSN_FORMAT_ARGS(CheckPointLoc))));
+       }
+       else
+       {
+           /*
+            * We used to attempt to go back to a secondary checkpoint record
+            * here, but only when not in standby mode. We now just fail if we
+            * can't read the last checkpoint because this allows us to
+            * simplify processing around checkpoints.
+            */
+           ereport(PANIC,
+                   (errmsg("could not locate a valid checkpoint record")));
+       }
+       memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
+       wasShutdown = ((record->xl_info & ~XLR_INFO_MASK) == XLOG_CHECKPOINT_SHUTDOWN);
+   }
+
+   /*
+    * If the location of the checkpoint record is not on the expected
+    * timeline in the history of the requested timeline, we cannot proceed:
+    * the backup is not part of the history of the requested timeline.
+    */
+   Assert(expectedTLEs);       /* was initialized by reading checkpoint
+                                * record */
+   if (tliOfPointInHistory(CheckPointLoc, expectedTLEs) !=
+       CheckPointTLI)
+   {
+       XLogRecPtr  switchpoint;
+
+       /*
+        * tliSwitchPoint will throw an error if the checkpoint's timeline is
+        * not in expectedTLEs at all.
+        */
+       switchpoint = tliSwitchPoint(ControlFile->checkPointCopy.ThisTimeLineID, expectedTLEs, NULL);
+       ereport(FATAL,
+               (errmsg("requested timeline %u is not a child of this server's history",
+                       recoveryTargetTLI),
+                errdetail("Latest checkpoint is at %X/%X on timeline %u, but in the history of the requested timeline, the server forked off from that timeline at %X/%X.",
+                          LSN_FORMAT_ARGS(ControlFile->checkPoint),
+                          ControlFile->checkPointCopy.ThisTimeLineID,
+                          LSN_FORMAT_ARGS(switchpoint))));
+   }
+
+   /*
+    * The min recovery point should be part of the requested timeline's
+    * history, too.
+    */
+   if (!XLogRecPtrIsInvalid(ControlFile->minRecoveryPoint) &&
+       tliOfPointInHistory(ControlFile->minRecoveryPoint - 1, expectedTLEs) !=
+       ControlFile->minRecoveryPointTLI)
+       ereport(FATAL,
+               (errmsg("requested timeline %u does not contain minimum recovery point %X/%X on timeline %u",
+                       recoveryTargetTLI,
+                       LSN_FORMAT_ARGS(ControlFile->minRecoveryPoint),
+                       ControlFile->minRecoveryPointTLI)));
+
+   ereport(DEBUG1,
+           (errmsg_internal("redo record is at %X/%X; shutdown %s",
+                            LSN_FORMAT_ARGS(checkPoint.redo),
+                            wasShutdown ? "true" : "false")));
+   ereport(DEBUG1,
+           (errmsg_internal("next transaction ID: " UINT64_FORMAT "; next OID: %u",
+                            U64FromFullTransactionId(checkPoint.nextXid),
+                            checkPoint.nextOid)));
+   ereport(DEBUG1,
+           (errmsg_internal("next MultiXactId: %u; next MultiXactOffset: %u",
+                            checkPoint.nextMulti, checkPoint.nextMultiOffset)));
+   ereport(DEBUG1,
+           (errmsg_internal("oldest unfrozen transaction ID: %u, in database %u",
+                            checkPoint.oldestXid, checkPoint.oldestXidDB)));
+   ereport(DEBUG1,
+           (errmsg_internal("oldest MultiXactId: %u, in database %u",
+                            checkPoint.oldestMulti, checkPoint.oldestMultiDB)));
+   ereport(DEBUG1,
+           (errmsg_internal("commit timestamp Xid oldest/newest: %u/%u",
+                            checkPoint.oldestCommitTsXid,
+                            checkPoint.newestCommitTsXid)));
+   if (!TransactionIdIsNormal(XidFromFullTransactionId(checkPoint.nextXid)))
+       ereport(PANIC,
+               (errmsg("invalid next transaction ID")));
+
+   /* sanity check */
+   if (checkPoint.redo > CheckPointLoc)
+       ereport(PANIC,
+               (errmsg("invalid redo in checkpoint record")));
+
+   /*
+    * Check whether we need to force recovery from WAL.  If it appears to
+    * have been a clean shutdown and we did not have a recovery signal file,
+    * then assume no recovery needed.
+    */
+   if (checkPoint.redo < CheckPointLoc)
+   {
+       if (wasShutdown)
+           ereport(PANIC,
+                   (errmsg("invalid redo record in shutdown checkpoint")));
+       InRecovery = true;
+   }
+   else if (ControlFile->state != DB_SHUTDOWNED)
+       InRecovery = true;
+   else if (ArchiveRecoveryRequested)
+   {
+       /* force recovery due to presence of recovery signal file */
+       InRecovery = true;
+   }
+
+   /*
+    * Update pg_control to show that we are recovering and to show the
+    * selected checkpoint as the place we are starting from. We also mark
+    * pg_control with any minimum recovery stop point obtained from a backup
+    * history file.
+    */
+   if (InArchiveRecovery)
+   {
+       ControlFile->state = DB_IN_ARCHIVE_RECOVERY;
+   }
+   else
+   {
+       ereport(LOG,
+               (errmsg("database system was not properly shut down; "
+                       "automatic recovery in progress")));
+       if (recoveryTargetTLI > ControlFile->checkPointCopy.ThisTimeLineID)
+           ereport(LOG,
+                   (errmsg("crash recovery starts in timeline %u "
+                           "and has target timeline %u",
+                           ControlFile->checkPointCopy.ThisTimeLineID,
+                           recoveryTargetTLI)));
+       ControlFile->state = DB_IN_CRASH_RECOVERY;
+   }
+   ControlFile->checkPoint = CheckPointLoc;
+   ControlFile->checkPointCopy = checkPoint;
+   if (InArchiveRecovery)
+   {
+       /* initialize minRecoveryPoint if not set yet */
+       if (ControlFile->minRecoveryPoint < checkPoint.redo)
+       {
+           ControlFile->minRecoveryPoint = checkPoint.redo;
+           ControlFile->minRecoveryPointTLI = checkPoint.ThisTimeLineID;
+       }
+   }
+
+   /*
+    * Set backupStartPoint if we're starting recovery from a base backup.
+    *
+    * Also set backupEndPoint and use minRecoveryPoint as the backup end
+    * location if we're starting recovery from a base backup which was taken
+    * from a standby. In this case, the database system status in pg_control
+    * must indicate that the database was already in recovery. Usually that
+    * will be DB_IN_ARCHIVE_RECOVERY but also can be
+    * DB_SHUTDOWNED_IN_RECOVERY if recovery previously was interrupted before
+    * reaching this point; e.g. because restore_command or primary_conninfo
+    * were faulty.
+    *
+    * Any other state indicates that the backup somehow became corrupted and
+    * we can't sensibly continue with recovery.
+    */
+   if (haveBackupLabel)
+   {
+       ControlFile->backupStartPoint = checkPoint.redo;
+       ControlFile->backupEndRequired = backupEndRequired;
+
+       if (backupFromStandby)
+       {
+           if (dbstate_at_startup != DB_IN_ARCHIVE_RECOVERY &&
+               dbstate_at_startup != DB_SHUTDOWNED_IN_RECOVERY)
+               ereport(FATAL,
+                       (errmsg("backup_label contains data inconsistent with control file"),
+                        errhint("This means that the backup is corrupted and you will "
+                                "have to use another backup for recovery.")));
+           ControlFile->backupEndPoint = ControlFile->minRecoveryPoint;
+       }
+   }
+
+   /* remember these, so that we know when we have reached consistency */
+   backupStartPoint = ControlFile->backupStartPoint;
+   backupEndRequired = ControlFile->backupEndRequired;
+   backupEndPoint = ControlFile->backupEndPoint;
+   if (InArchiveRecovery)
+   {
+       minRecoveryPoint = ControlFile->minRecoveryPoint;
+       minRecoveryPointTLI = ControlFile->minRecoveryPointTLI;
+   }
+   else
+   {
+       minRecoveryPoint = InvalidXLogRecPtr;
+       minRecoveryPointTLI = 0;
+   }
+
+   /*
+    * Start recovery assuming that the final record isn't lost.
+    */
+   abortedRecPtr = InvalidXLogRecPtr;
+   missingContrecPtr = InvalidXLogRecPtr;
+
+   *wasShutdown_ptr = wasShutdown;
+   *haveBackupLabel_ptr = haveBackupLabel;
+   *haveTblspcMap_ptr = haveTblspcMap;
+}
+
+/*
+ * See if there are any recovery signal files and if so, set state for
+ * recovery.
+ *
+ * See if there is a recovery command file (recovery.conf), and if so
+ * throw an ERROR since as of PG12 we no longer recognize that.
+ */
+static void
+readRecoverySignalFile(void)
+{
+   struct stat stat_buf;
+
+   if (IsBootstrapProcessingMode())
+       return;
+
+   /*
+    * Check for old recovery API file: recovery.conf
+    */
+   if (stat(RECOVERY_COMMAND_FILE, &stat_buf) == 0)
+       ereport(FATAL,
+               (errcode_for_file_access(),
+                errmsg("using recovery command file \"%s\" is not supported",
+                       RECOVERY_COMMAND_FILE)));
+
+   /*
+    * Remove unused .done file, if present. Ignore if absent.
+    */
+   unlink(RECOVERY_COMMAND_DONE);
+
+   /*
+    * Check for recovery signal files and if found, fsync them since they
+    * represent server state information.  We don't sweat too much about the
+    * possibility of fsync failure, however.
+    *
+    * If present, standby signal file takes precedence. If neither is present
+    * then we won't enter archive recovery.
+    */
+   if (stat(STANDBY_SIGNAL_FILE, &stat_buf) == 0)
+   {
+       int         fd;
+
+       fd = BasicOpenFilePerm(STANDBY_SIGNAL_FILE, O_RDWR | PG_BINARY,
+                              S_IRUSR | S_IWUSR);
+       if (fd >= 0)
+       {
+           (void) pg_fsync(fd);
+           close(fd);
+       }
+       standby_signal_file_found = true;
+   }
+   else if (stat(RECOVERY_SIGNAL_FILE, &stat_buf) == 0)
+   {
+       int         fd;
+
+       fd = BasicOpenFilePerm(RECOVERY_SIGNAL_FILE, O_RDWR | PG_BINARY,
+                              S_IRUSR | S_IWUSR);
+       if (fd >= 0)
+       {
+           (void) pg_fsync(fd);
+           close(fd);
+       }
+       recovery_signal_file_found = true;
+   }
+
+   StandbyModeRequested = false;
+   ArchiveRecoveryRequested = false;
+   if (standby_signal_file_found)
+   {
+       StandbyModeRequested = true;
+       ArchiveRecoveryRequested = true;
+   }
+   else if (recovery_signal_file_found)
+   {
+       StandbyModeRequested = false;
+       ArchiveRecoveryRequested = true;
+   }
+   else
+       return;
+
+   /*
+    * We don't support standby mode in standalone backends; that requires
+    * other processes such as the WAL receiver to be alive.
+    */
+   if (StandbyModeRequested && !IsUnderPostmaster)
+       ereport(FATAL,
+               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                errmsg("standby mode is not supported by single-user servers")));
+}
+
+static void
+validateRecoveryParameters(void)
+{
+   if (!ArchiveRecoveryRequested)
+       return;
+
+   /*
+    * Check for compulsory parameters
+    */
+   if (StandbyModeRequested)
+   {
+       if ((PrimaryConnInfo == NULL || strcmp(PrimaryConnInfo, "") == 0) &&
+           (recoveryRestoreCommand == NULL || strcmp(recoveryRestoreCommand, "") == 0))
+           ereport(WARNING,
+                   (errmsg("specified neither primary_conninfo nor restore_command"),
+                    errhint("The database server will regularly poll the pg_wal subdirectory to check for files placed there.")));
+   }
+   else
+   {
+       if (recoveryRestoreCommand == NULL ||
+           strcmp(recoveryRestoreCommand, "") == 0)
+           ereport(FATAL,
+                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                    errmsg("must specify restore_command when standby mode is not enabled")));
+   }
+
+   /*
+    * Override any inconsistent requests. Note that this is a change of
+    * behaviour in 9.5; prior to this we simply ignored a request to pause if
+    * hot_standby = off, which was surprising behaviour.
+    */
+   if (recoveryTargetAction == RECOVERY_TARGET_ACTION_PAUSE &&
+       !EnableHotStandby)
+       recoveryTargetAction = RECOVERY_TARGET_ACTION_SHUTDOWN;
+
+   /*
+    * Final parsing of recovery_target_time string; see also
+    * check_recovery_target_time().
+    */
+   if (recoveryTarget == RECOVERY_TARGET_TIME)
+   {
+       recoveryTargetTime = DatumGetTimestampTz(DirectFunctionCall3(timestamptz_in,
+                                                                    CStringGetDatum(recovery_target_time_string),
+                                                                    ObjectIdGetDatum(InvalidOid),
+                                                                    Int32GetDatum(-1)));
+   }
+
+   /*
+    * If user specified recovery_target_timeline, validate it or compute the
+    * "latest" value.  We can't do this until after we've gotten the restore
+    * command and set InArchiveRecovery, because we need to fetch timeline
+    * history files from the archive.
+    */
+   if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_NUMERIC)
+   {
+       TimeLineID  rtli = recoveryTargetTLIRequested;
+
+       /* Timeline 1 does not have a history file, all else should */
+       if (rtli != 1 && !existsTimeLineHistory(rtli))
+           ereport(FATAL,
+                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                    errmsg("recovery target timeline %u does not exist",
+                           rtli)));
+       recoveryTargetTLI = rtli;
+   }
+   else if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
+   {
+       /* We start the "latest" search from pg_control's timeline */
+       recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
+   }
+   else
+   {
+       /*
+        * else we just use the recoveryTargetTLI as already read from
+        * ControlFile
+        */
+       Assert(recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_CONTROLFILE);
+   }
+}
+
+/*
+ * read_backup_label: check to see if a backup_label file is present
+ *
+ * If we see a backup_label during recovery, we assume that we are recovering
+ * from a backup dump file, and we therefore roll forward from the checkpoint
+ * identified by the label file, NOT what pg_control says.  This avoids the
+ * problem that pg_control might have been archived one or more checkpoints
+ * later than the start of the dump, and so if we rely on it as the start
+ * point, we will fail to restore a consistent database state.
+ *
+ * Returns true if a backup_label was found (and fills the checkpoint
+ * location and TLI into *checkPointLoc and *backupLabelTLI, respectively);
+ * returns false if not. If this backup_label came from a streamed backup,
+ * *backupEndRequired is set to true. If this backup_label was created during
+ * recovery, *backupFromStandby is set to true.
+ *
+ * Also sets the global variables RedoStartLSN and RedoStartTLI with the LSN
+ * and TLI read from the backup file.
+ */
+static bool
+read_backup_label(XLogRecPtr *checkPointLoc, TimeLineID *backupLabelTLI,
+                 bool *backupEndRequired, bool *backupFromStandby)
+{
+   char        startxlogfilename[MAXFNAMELEN];
+   TimeLineID  tli_from_walseg,
+               tli_from_file;
+   FILE       *lfp;
+   char        ch;
+   char        backuptype[20];
+   char        backupfrom[20];
+   char        backuplabel[MAXPGPATH];
+   char        backuptime[128];
+   uint32      hi,
+               lo;
+
+   /* suppress possible uninitialized-variable warnings */
+   *checkPointLoc = InvalidXLogRecPtr;
+   *backupLabelTLI = 0;
+   *backupEndRequired = false;
+   *backupFromStandby = false;
+
+   /*
+    * See if label file is present
+    */
+   lfp = AllocateFile(BACKUP_LABEL_FILE, "r");
+   if (!lfp)
+   {
+       if (errno != ENOENT)
+           ereport(FATAL,
+                   (errcode_for_file_access(),
+                    errmsg("could not read file \"%s\": %m",
+                           BACKUP_LABEL_FILE)));
+       return false;           /* it's not there, all is fine */
+   }
+
+   /*
+    * Read and parse the START WAL LOCATION and CHECKPOINT lines (this code
+    * is pretty crude, but we are not expecting any variability in the file
+    * format).
+    */
+   if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %08X%16s)%c",
+              &hi, &lo, &tli_from_walseg, startxlogfilename, &ch) != 5 || ch != '\n')
+       ereport(FATAL,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
+   RedoStartLSN = ((uint64) hi) << 32 | lo;
+   RedoStartTLI = tli_from_walseg;
+   if (fscanf(lfp, "CHECKPOINT LOCATION: %X/%X%c",
+              &hi, &lo, &ch) != 3 || ch != '\n')
+       ereport(FATAL,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE)));
+   *checkPointLoc = ((uint64) hi) << 32 | lo;
+   *backupLabelTLI = tli_from_walseg;
+
+   /*
+    * BACKUP METHOD and BACKUP FROM lines are new in 9.2. We can't restore
+    * from an older backup anyway, but since the information on it is not
+    * strictly required, don't error out if it's missing for some reason.
+    */
+   if (fscanf(lfp, "BACKUP METHOD: %19s\n", backuptype) == 1)
+   {
+       if (strcmp(backuptype, "streamed") == 0)
+           *backupEndRequired = true;
+   }
+
+   if (fscanf(lfp, "BACKUP FROM: %19s\n", backupfrom) == 1)
+   {
+       if (strcmp(backupfrom, "standby") == 0)
+           *backupFromStandby = true;
+   }
+
+   /*
+    * Parse START TIME and LABEL. Those are not mandatory fields for recovery
+    * but checking for their presence is useful for debugging and the next
+    * sanity checks. Cope also with the fact that the result buffers have a
+    * pre-allocated size, hence if the backup_label file has been generated
+    * with strings longer than the maximum assumed here an incorrect parsing
+    * happens. That's fine as only minor consistency checks are done
+    * afterwards.
+    */
+   if (fscanf(lfp, "START TIME: %127[^\n]\n", backuptime) == 1)
+       ereport(DEBUG1,
+               (errmsg_internal("backup time %s in file \"%s\"",
+                                backuptime, BACKUP_LABEL_FILE)));
+
+   if (fscanf(lfp, "LABEL: %1023[^\n]\n", backuplabel) == 1)
+       ereport(DEBUG1,
+               (errmsg_internal("backup label %s in file \"%s\"",
+                                backuplabel, BACKUP_LABEL_FILE)));
+
+   /*
+    * START TIMELINE is new as of 11. Its parsing is not mandatory, still use
+    * it as a sanity check if present.
+    */
+   if (fscanf(lfp, "START TIMELINE: %u\n", &tli_from_file) == 1)
+   {
+       if (tli_from_walseg != tli_from_file)
+           ereport(FATAL,
+                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                    errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE),
+                    errdetail("Timeline ID parsed is %u, but expected %u.",
+                              tli_from_file, tli_from_walseg)));
+
+       ereport(DEBUG1,
+               (errmsg_internal("backup timeline %u in file \"%s\"",
+                                tli_from_file, BACKUP_LABEL_FILE)));
+   }
+
+   if (ferror(lfp) || FreeFile(lfp))
+       ereport(FATAL,
+               (errcode_for_file_access(),
+                errmsg("could not read file \"%s\": %m",
+                       BACKUP_LABEL_FILE)));
+
+   return true;
+}
+
+/*
+ * read_tablespace_map: check to see if a tablespace_map file is present
+ *
+ * If we see a tablespace_map file during recovery, we assume that we are
+ * recovering from a backup dump file, and we therefore need to create symlinks
+ * as per the information present in tablespace_map file.
+ *
+ * Returns true if a tablespace_map file was found (and fills *tablespaces
+ * with a tablespaceinfo struct for each tablespace listed in the file);
+ * returns false if not.
+ */
+static bool
+read_tablespace_map(List **tablespaces)
+{
+   tablespaceinfo *ti;
+   FILE       *lfp;
+   char        str[MAXPGPATH];
+   int         ch,
+               i,
+               n;
+   bool        was_backslash;
+
+   /*
+    * See if tablespace_map file is present
+    */
+   lfp = AllocateFile(TABLESPACE_MAP, "r");
+   if (!lfp)
+   {
+       if (errno != ENOENT)
+           ereport(FATAL,
+                   (errcode_for_file_access(),
+                    errmsg("could not read file \"%s\": %m",
+                           TABLESPACE_MAP)));
+       return false;           /* it's not there, all is fine */
+   }
+
+   /*
+    * Read and parse the link name and path lines from tablespace_map file
+    * (this code is pretty crude, but we are not expecting any variability in
+    * the file format).  De-escape any backslashes that were inserted.
+    */
+   i = 0;
+   was_backslash = false;
+   while ((ch = fgetc(lfp)) != EOF)
+   {
+       if (!was_backslash && (ch == '\n' || ch == '\r'))
+       {
+           if (i == 0)
+               continue;       /* \r immediately followed by \n */
+
+           /*
+            * The de-escaped line should contain an OID followed by exactly
+            * one space followed by a path.  The path might start with
+            * spaces, so don't be too liberal about parsing.
+            */
+           str[i] = '\0';
+           n = 0;
+           while (str[n] && str[n] != ' ')
+               n++;
+           if (n < 1 || n >= i - 1)
+               ereport(FATAL,
+                       (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                        errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+           str[n++] = '\0';
+
+           ti = palloc0(sizeof(tablespaceinfo));
+           ti->oid = pstrdup(str);
+           ti->path = pstrdup(str + n);
+           *tablespaces = lappend(*tablespaces, ti);
+
+           i = 0;
+           continue;
+       }
+       else if (!was_backslash && ch == '\\')
+           was_backslash = true;
+       else
+       {
+           if (i < sizeof(str) - 1)
+               str[i++] = ch;
+           was_backslash = false;
+       }
+   }
+
+   if (i != 0 || was_backslash)    /* last line not terminated? */
+       ereport(FATAL,
+               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                errmsg("invalid data in file \"%s\"", TABLESPACE_MAP)));
+
+   if (ferror(lfp) || FreeFile(lfp))
+       ereport(FATAL,
+               (errcode_for_file_access(),
+                errmsg("could not read file \"%s\": %m",
+                       TABLESPACE_MAP)));
+
+   return true;
+}
+
+/*
+ * Finish WAL recovery.
+ *
+ * This does not close the 'xlogreader' yet, because in some cases the caller
+ * still wants to re-read the last checkpoint record by calling
+ * ReadCheckPointRecord().
+ *
+ * Returns the position of the last valid or applied record, after which new
+ * WAL should be appended, information about why recovery was ended, and some
+ * other things. See the WalRecoveryResult struct for details.
+ */
+EndOfWalRecoveryInfo *
+FinishWalRecovery(void)
+{
+   EndOfWalRecoveryInfo *result = palloc(sizeof(EndOfWalRecoveryInfo));
+   XLogRecPtr  lastRec;
+   TimeLineID  lastRecTLI;
+   XLogRecPtr  endOfLog;
+
+   /*
+    * Kill WAL receiver, if it's still running, before we continue to write
+    * the startup checkpoint and aborted-contrecord records. It will trump
+    * over these records and subsequent ones if it's still alive when we
+    * start writing WAL.
+    */
+   XLogShutdownWalRcv();
+
+   /*
+    * We are now done reading the xlog from stream. Turn off streaming
+    * recovery to force fetching the files (which would be required at end of
+    * recovery, e.g., timeline history file) from archive or pg_wal.
+    *
+    * Note that standby mode must be turned off after killing WAL receiver,
+    * i.e., calling XLogShutdownWalRcv().
+    */
+   Assert(!WalRcvStreaming());
+   StandbyMode = false;
+
+   /*
+    * Determine where to start writing WAL next.
+    *
+    * Re-fetch the last valid or last applied record, so we can identify the
+    * exact endpoint of what we consider the valid portion of WAL.  There may
+    * be an incomplete continuation record after that, in which case
+    * 'abortedRecPtr' and 'missingContrecPtr' are set and the caller will
+    * write a special OVERWRITE_CONTRECORD message to mark that the rest of
+    * it is intentionally missing.  See CreateOverwriteContrecordRecord().
+    *
+    * An important side-effect of this is to load the last page into
+    * xlogreader. The caller uses it to initialize the WAL for writing.
+    */
+   if (!InRecovery)
+   {
+       lastRec = CheckPointLoc;
+       lastRecTLI = CheckPointTLI;
+   }
+   else
+   {
+       lastRec = XLogRecoveryCtl->lastReplayedReadRecPtr;
+       lastRecTLI = XLogRecoveryCtl->lastReplayedTLI;
+   }
+   XLogBeginRead(xlogreader, lastRec);
+   (void) ReadRecord(xlogreader, PANIC, false, lastRecTLI);
+   endOfLog = xlogreader->EndRecPtr;
+
+   /*
+    * Remember the TLI in the filename of the XLOG segment containing the
+    * end-of-log.  It could be different from the timeline that endOfLog
+    * nominally belongs to, if there was a timeline switch in that segment,
+    * and we were reading the old WAL from a segment belonging to a higher
+    * timeline.
+    */
+   result->endOfLogTLI = xlogreader->seg.ws_tli;
+
+   if (ArchiveRecoveryRequested)
+   {
+       /*
+        * We are no longer in archive recovery state.
+        *
+        * We are now done reading the old WAL.  Turn off archive fetching if
+        * it was active.
+        */
+       Assert(InArchiveRecovery);
+       InArchiveRecovery = false;
+
+       /*
+        * If the ending log segment is still open, close it (to avoid
+        * problems on Windows with trying to rename or delete an open file).
+        */
+       if (readFile >= 0)
+       {
+           close(readFile);
+           readFile = -1;
+       }
+   }
+
+   /*
+    * Copy the last partial block to the caller, for initializing the WAL
+    * buffer for appending new WAL.
+    */
+   if (endOfLog % XLOG_BLCKSZ != 0)
+   {
+       char       *page;
+       int         len;
+       XLogRecPtr  pageBeginPtr;
+
+       pageBeginPtr = endOfLog - (endOfLog % XLOG_BLCKSZ);
+       Assert(readOff == XLogSegmentOffset(pageBeginPtr, wal_segment_size));
+
+       /* Copy the valid part of the last block */
+       len = endOfLog % XLOG_BLCKSZ;
+       page = palloc(len);
+       memcpy(page, xlogreader->readBuf, len);
+
+       result->lastPageBeginPtr = pageBeginPtr;
+       result->lastPage = page;
+   }
+   else
+   {
+       /* There is no partial block to copy. */
+       result->lastPageBeginPtr = endOfLog;
+       result->lastPage = NULL;
+   }
+
+   /*
+    * Create a comment for the history file to explain why and where timeline
+    * changed.
+    */
+   result->recoveryStopReason = getRecoveryStopReason();
+
+   result->lastRec = lastRec;
+   result->lastRecTLI = lastRecTLI;
+   result->endOfLog = endOfLog;
+
+   result->abortedRecPtr = abortedRecPtr;
+   result->missingContrecPtr = missingContrecPtr;
+
+   result->standby_signal_file_found = standby_signal_file_found;
+   result->recovery_signal_file_found = recovery_signal_file_found;
+
+   return result;
+}
+
+/*
+ * Clean up the WAL reader and leftovers from restoring WAL from archive
+ */
+void
+ShutdownWalRecovery(void)
+{
+   char        recoveryPath[MAXPGPATH];
+
+   /* Shut down xlogreader */
+   if (readFile >= 0)
+   {
+       close(readFile);
+       readFile = -1;
+   }
+   XLogReaderFree(xlogreader);
+
+   if (ArchiveRecoveryRequested)
+   {
+       /*
+        * Since there might be a partial WAL segment named RECOVERYXLOG, get
+        * rid of it.
+        */
+       snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYXLOG");
+       unlink(recoveryPath);   /* ignore any error */
+
+       /* Get rid of any remaining recovered timeline-history file, too */
+       snprintf(recoveryPath, MAXPGPATH, XLOGDIR "/RECOVERYHISTORY");
+       unlink(recoveryPath);   /* ignore any error */
+   }
+
+   /*
+    * We don't need the latch anymore. It's not strictly necessary to disown
+    * it, but let's do it for the sake of tidiness.
+    */
+   if (ArchiveRecoveryRequested)
+       DisownLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+}
+
+/*
+ * Perform WAL recovery.
+ *
+ * If the system was shut down cleanly, this is never called.
+ */
+void
+PerformWalRecovery(void)
+{
+   int         rmid;
+   XLogRecord *record;
+   bool        reachedRecoveryTarget = false;
+   TimeLineID  replayTLI;
+
+   /*
+    * Initialize shared variables for tracking progress of WAL replay, as if
+    * we had just replayed the record before the REDO location (or the
+    * checkpoint record itself, if it's a shutdown checkpoint).
+    */
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   if (RedoStartLSN < CheckPointLoc)
+   {
+       XLogRecoveryCtl->lastReplayedReadRecPtr = InvalidXLogRecPtr;
+       XLogRecoveryCtl->lastReplayedEndRecPtr = RedoStartLSN;
+       XLogRecoveryCtl->lastReplayedTLI = RedoStartTLI;
+   }
+   else
+   {
+       XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
+       XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
+       XLogRecoveryCtl->lastReplayedTLI = CheckPointTLI;
+   }
+   XLogRecoveryCtl->replayEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
+   XLogRecoveryCtl->replayEndTLI = XLogRecoveryCtl->lastReplayedTLI;
+   XLogRecoveryCtl->recoveryLastXTime = 0;
+   XLogRecoveryCtl->currentChunkStartTime = 0;
+   XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   /* Also ensure XLogReceiptTime has a sane value */
+   XLogReceiptTime = GetCurrentTimestamp();
+
+   /*
+    * Let postmaster know we've started redo now, so that it can launch the
+    * archiver if necessary.
+    */
+   if (IsUnderPostmaster)
+       SendPostmasterSignal(PMSIGNAL_RECOVERY_STARTED);
+
+   /*
+    * Allow read-only connections immediately if we're consistent already.
+    */
+   CheckRecoveryConsistency();
+
+   /*
+    * Find the first record that logically follows the checkpoint --- it
+    * might physically precede it, though.
+    */
+   if (RedoStartLSN < CheckPointLoc)
+   {
+       /* back up to find the record */
+       replayTLI = RedoStartTLI;
+       XLogBeginRead(xlogreader, RedoStartLSN);
+       record = ReadRecord(xlogreader, PANIC, false, replayTLI);
+   }
+   else
+   {
+       /* just have to read next record after CheckPoint */
+       Assert(xlogreader->ReadRecPtr == CheckPointLoc);
+       replayTLI = CheckPointTLI;
+       record = ReadRecord(xlogreader, LOG, false, replayTLI);
+   }
+
+   if (record != NULL)
+   {
+       TimestampTz xtime;
+       PGRUsage    ru0;
+
+       pg_rusage_init(&ru0);
+
+       InRedo = true;
+
+       /* Initialize resource managers */
+       for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
+       {
+           if (RmgrTable[rmid].rm_startup != NULL)
+               RmgrTable[rmid].rm_startup();
+       }
+
+       ereport(LOG,
+               (errmsg("redo starts at %X/%X",
+                       LSN_FORMAT_ARGS(xlogreader->ReadRecPtr))));
+
+       /* Prepare to report progress of the redo phase. */
+       if (!StandbyMode)
+           begin_startup_progress_phase();
+
+       /*
+        * main redo apply loop
+        */
+       do
+       {
+           if (!StandbyMode)
+               ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+                                        LSN_FORMAT_ARGS(xlogreader->ReadRecPtr));
+
+#ifdef WAL_DEBUG
+           if (XLOG_DEBUG ||
+               (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
+               (rmid != RM_XACT_ID && trace_recovery_messages <= DEBUG3))
+           {
+               StringInfoData buf;
+
+               initStringInfo(&buf);
+               appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
+                                LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
+                                LSN_FORMAT_ARGS(xlogreader->EndRecPtr));
+               xlog_outrec(&buf, xlogreader);
+               appendStringInfoString(&buf, " - ");
+               xlog_outdesc(&buf, xlogreader);
+               elog(LOG, "%s", buf.data);
+               pfree(buf.data);
+           }
+#endif
+
+           /* Handle interrupt signals of startup process */
+           HandleStartupProcInterrupts();
+
+           /*
+            * Pause WAL replay, if requested by a hot-standby session via
+            * SetRecoveryPause().
+            *
+            * Note that we intentionally don't take the info_lck spinlock
+            * here.  We might therefore read a slightly stale value of the
+            * recoveryPause flag, but it can't be very stale (no worse than
+            * the last spinlock we did acquire).  Since a pause request is a
+            * pretty asynchronous thing anyway, possibly responding to it one
+            * WAL record later than we otherwise would is a minor issue, so
+            * it doesn't seem worth adding another spinlock cycle to prevent
+            * that.
+            */
+           if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
+               RECOVERY_NOT_PAUSED)
+               recoveryPausesHere(false);
+
+           /*
+            * Have we reached our recovery target?
+            */
+           if (recoveryStopsBefore(xlogreader))
+           {
+               reachedRecoveryTarget = true;
+               break;
+           }
+
+           /*
+            * If we've been asked to lag the primary, wait on latch until
+            * enough time has passed.
+            */
+           if (recoveryApplyDelay(xlogreader))
+           {
+               /*
+                * We test for paused recovery again here. If user sets
+                * delayed apply, it may be because they expect to pause
+                * recovery in case of problems, so we must test again here
+                * otherwise pausing during the delay-wait wouldn't work.
+                */
+               if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
+                   RECOVERY_NOT_PAUSED)
+                   recoveryPausesHere(false);
+           }
+
+           /*
+            * Apply the record
+            */
+           ApplyWalRecord(xlogreader, record, &replayTLI);
+
+           /* Exit loop if we reached inclusive recovery target */
+           if (recoveryStopsAfter(xlogreader))
+           {
+               reachedRecoveryTarget = true;
+               break;
+           }
+
+           /* Else, try to fetch the next WAL record */
+           record = ReadRecord(xlogreader, LOG, false, replayTLI);
+       } while (record != NULL);
+
+       /*
+        * end of main redo apply loop
+        */
+
+       if (reachedRecoveryTarget)
+       {
+           if (!reachedConsistency)
+               ereport(FATAL,
+                       (errmsg("requested recovery stop point is before consistent recovery point")));
+
+           /*
+            * This is the last point where we can restart recovery with a new
+            * recovery target, if we shutdown and begin again. After this,
+            * Resource Managers may choose to do permanent corrective actions
+            * at end of recovery.
+            */
+           switch (recoveryTargetAction)
+           {
+               case RECOVERY_TARGET_ACTION_SHUTDOWN:
+
+                   /*
+                    * exit with special return code to request shutdown of
+                    * postmaster.  Log messages issued from postmaster.
+                    */
+                   proc_exit(3);
+
+               case RECOVERY_TARGET_ACTION_PAUSE:
+                   SetRecoveryPause(true);
+                   recoveryPausesHere(true);
+
+                   /* drop into promote */
+
+               case RECOVERY_TARGET_ACTION_PROMOTE:
+                   break;
+           }
+       }
+
+       /* Allow resource managers to do any required cleanup. */
+       for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
+       {
+           if (RmgrTable[rmid].rm_cleanup != NULL)
+               RmgrTable[rmid].rm_cleanup();
+       }
+
+       ereport(LOG,
+               (errmsg("redo done at %X/%X system usage: %s",
+                       LSN_FORMAT_ARGS(xlogreader->ReadRecPtr),
+                       pg_rusage_show(&ru0))));
+       xtime = GetLatestXTime();
+       if (xtime)
+           ereport(LOG,
+                   (errmsg("last completed transaction was at log time %s",
+                           timestamptz_to_str(xtime))));
+
+       InRedo = false;
+   }
+   else
+   {
+       /* there are no WAL records following the checkpoint */
+       ereport(LOG,
+               (errmsg("redo is not required")));
+
+   }
+
+   /*
+    * This check is intentionally after the above log messages that indicate
+    * how far recovery went.
+    */
+   if (ArchiveRecoveryRequested &&
+       recoveryTarget != RECOVERY_TARGET_UNSET &&
+       !reachedRecoveryTarget)
+       ereport(FATAL,
+               (errmsg("recovery ended before configured recovery target was reached")));
+}
+
+/*
+ * Subroutine of PerformWalRecovery, to apply one WAL record.
+ */
+static void
+ApplyWalRecord(XLogReaderState *xlogreader, XLogRecord *record, TimeLineID *replayTLI)
+{
+   ErrorContextCallback errcallback;
+   bool        switchedTLI = false;
+
+   /* Setup error traceback support for ereport() */
+   errcallback.callback = rm_redo_error_callback;
+   errcallback.arg = (void *) xlogreader;
+   errcallback.previous = error_context_stack;
+   error_context_stack = &errcallback;
+
+   /*
+    * ShmemVariableCache->nextXid must be beyond record's xid.
+    */
+   AdvanceNextFullTransactionIdPastXid(record->xl_xid);
+
+   /*
+    * Before replaying this record, check if this record causes the current
+    * timeline to change. The record is already considered to be part of the
+    * new timeline, so we update replayTLI before replaying it. That's
+    * important so that replayEndTLI, which is recorded as the minimum
+    * recovery point's TLI if recovery stops after this record, is set
+    * correctly.
+    */
+   if (record->xl_rmid == RM_XLOG_ID)
+   {
+       TimeLineID  newReplayTLI = *replayTLI;
+       TimeLineID  prevReplayTLI = *replayTLI;
+       uint8       info = record->xl_info & ~XLR_INFO_MASK;
+
+       if (info == XLOG_CHECKPOINT_SHUTDOWN)
+       {
+           CheckPoint  checkPoint;
+
+           memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
+           newReplayTLI = checkPoint.ThisTimeLineID;
+           prevReplayTLI = checkPoint.PrevTimeLineID;
+       }
+       else if (info == XLOG_END_OF_RECOVERY)
+       {
+           xl_end_of_recovery xlrec;
+
+           memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
+           newReplayTLI = xlrec.ThisTimeLineID;
+           prevReplayTLI = xlrec.PrevTimeLineID;
+       }
+
+       if (newReplayTLI != *replayTLI)
+       {
+           /* Check that it's OK to switch to this TLI */
+           checkTimeLineSwitch(xlogreader->EndRecPtr,
+                               newReplayTLI, prevReplayTLI, *replayTLI);
+
+           /* Following WAL records should be run with new TLI */
+           *replayTLI = newReplayTLI;
+           switchedTLI = true;
+       }
+   }
+
+   /*
+    * Update shared replayEndRecPtr before replaying this record, so that
+    * XLogFlush will update minRecoveryPoint correctly.
+    */
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   XLogRecoveryCtl->replayEndRecPtr = xlogreader->EndRecPtr;
+   XLogRecoveryCtl->replayEndTLI = *replayTLI;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   /*
+    * If we are attempting to enter Hot Standby mode, process XIDs we see
+    */
+   if (standbyState >= STANDBY_INITIALIZED &&
+       TransactionIdIsValid(record->xl_xid))
+       RecordKnownAssignedTransactionIds(record->xl_xid);
+
+   /*
+    * Some XLOG record types that are related to recovery are processed
+    * directly here, rather than in xlog_redo()
+    */
+   if (record->xl_rmid == RM_XLOG_ID)
+       xlogrecovery_redo(xlogreader, *replayTLI);
+
+   /* Now apply the WAL record itself */
+   RmgrTable[record->xl_rmid].rm_redo(xlogreader);
+
+   /*
+    * After redo, check whether the backup pages associated with the WAL
+    * record are consistent with the existing pages. This check is done only
+    * if consistency check is enabled for this record.
+    */
+   if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
+       verifyBackupPageConsistency(xlogreader);
+
+   /* Pop the error context stack */
+   error_context_stack = errcallback.previous;
+
+   /*
+    * Update lastReplayedEndRecPtr after this record has been successfully
+    * replayed.
+    */
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   XLogRecoveryCtl->lastReplayedReadRecPtr = xlogreader->ReadRecPtr;
+   XLogRecoveryCtl->lastReplayedEndRecPtr = xlogreader->EndRecPtr;
+   XLogRecoveryCtl->lastReplayedTLI = *replayTLI;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   /*
+    * If rm_redo called XLogRequestWalReceiverReply, then we wake up the
+    * receiver so that it notices the updated lastReplayedEndRecPtr and sends
+    * a reply to the primary.
+    */
+   if (doRequestWalReceiverReply)
+   {
+       doRequestWalReceiverReply = false;
+       WalRcvForceReply();
+   }
+
+   /* Allow read-only connections if we're consistent now */
+   CheckRecoveryConsistency();
+
+   /* Is this a timeline switch? */
+   if (switchedTLI)
+   {
+       /*
+        * Before we continue on the new timeline, clean up any (possibly
+        * bogus) future WAL segments on the old timeline.
+        */
+       RemoveNonParentXlogFiles(xlogreader->EndRecPtr, *replayTLI);
+
+       /*
+        * Wake up any walsenders to notice that we are on a new timeline.
+        */
+       if (AllowCascadeReplication())
+           WalSndWakeup();
+   }
+}
+
+/*
+ * Some XLOG RM record types that are directly related to WAL recovery are
+ * handled here rather than in the xlog_redo()
+ */
+static void
+xlogrecovery_redo(XLogReaderState *record, TimeLineID replayTLI)
+{
+   uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+   XLogRecPtr  lsn = record->EndRecPtr;
+
+   Assert(XLogRecGetRmid(record) == RM_XLOG_ID);
+
+   if (info == XLOG_OVERWRITE_CONTRECORD)
+   {
+       /* Verify the payload of a XLOG_OVERWRITE_CONTRECORD record. */
+       xl_overwrite_contrecord xlrec;
+
+       memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_overwrite_contrecord));
+       if (xlrec.overwritten_lsn != record->overwrittenRecPtr)
+           elog(FATAL, "mismatching overwritten LSN %X/%X -> %X/%X",
+                LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
+                LSN_FORMAT_ARGS(record->overwrittenRecPtr));
+
+       ereport(LOG,
+               (errmsg("successfully skipped missing contrecord at %X/%X, overwritten at %s",
+                       LSN_FORMAT_ARGS(xlrec.overwritten_lsn),
+                       timestamptz_to_str(xlrec.overwrite_time))));
+
+       /* Verifying the record should only happen once */
+       record->overwrittenRecPtr = InvalidXLogRecPtr;
+   }
+   else if (info == XLOG_BACKUP_END)
+   {
+       XLogRecPtr  startpoint;
+
+       memcpy(&startpoint, XLogRecGetData(record), sizeof(startpoint));
+
+       if (backupStartPoint == startpoint)
+       {
+           /*
+            * We have reached the end of base backup, the point where
+            * pg_stop_backup() was done.  The data on disk is now consistent
+            * (assuming we have also reached minRecoveryPoint).  Set
+            * backupEndPoint to the current LSN, so that the next call to
+            * CheckRecoveryConsistency() will notice it and do the
+            * end-of-backup processing.
+            */
+           elog(DEBUG1, "end of backup record reached");
+
+           backupEndPoint = lsn;
+       }
+       else
+           elog(DEBUG1, "saw end-of-backup record for backup starting at %X/%X, waiting for %X/%X",
+                LSN_FORMAT_ARGS(startpoint), LSN_FORMAT_ARGS(backupStartPoint));
+   }
+}
+
+/*
+ * Checks if recovery has reached a consistent state. When consistency is
+ * reached and we have a valid starting standby snapshot, tell postmaster
+ * that it can start accepting read-only connections.
+ */
+static void
+CheckRecoveryConsistency(void)
+{
+   XLogRecPtr  lastReplayedEndRecPtr;
+   TimeLineID  lastReplayedTLI;
+
+   /*
+    * During crash recovery, we don't reach a consistent state until we've
+    * replayed all the WAL.
+    */
+   if (XLogRecPtrIsInvalid(minRecoveryPoint))
+       return;
+
+   Assert(InArchiveRecovery);
+
+   /*
+    * assume that we are called in the startup process, and hence don't need
+    * a lock to read lastReplayedEndRecPtr
+    */
+   lastReplayedEndRecPtr = XLogRecoveryCtl->lastReplayedEndRecPtr;
+   lastReplayedTLI = XLogRecoveryCtl->lastReplayedTLI;
+
+   /*
+    * Have we reached the point where our base backup was completed?
+    */
+   if (!XLogRecPtrIsInvalid(backupEndPoint) &&
+       backupEndPoint <= lastReplayedEndRecPtr)
+   {
+       elog(DEBUG1, "end of backup reached");
+
+       /*
+        * We have reached the end of base backup, as indicated by pg_control.
+        * Update the control file accordingly.
+        */
+       ReachedEndOfBackup(lastReplayedEndRecPtr, lastReplayedTLI);
+       backupStartPoint = InvalidXLogRecPtr;
+       backupEndPoint = InvalidXLogRecPtr;
+       backupEndRequired = false;
+   }
+
+   /*
+    * Have we passed our safe starting point? Note that minRecoveryPoint is
+    * known to be incorrectly set if ControlFile->backupEndRequired, until
+    * the XLOG_BACKUP_END arrives to advise us of the correct
+    * minRecoveryPoint. All we know prior to that is that we're not
+    * consistent yet.
+    */
+   if (!reachedConsistency && !backupEndRequired &&
+       minRecoveryPoint <= lastReplayedEndRecPtr)
+   {
+       /*
+        * Check to see if the XLOG sequence contained any unresolved
+        * references to uninitialized pages.
+        */
+       XLogCheckInvalidPages();
+
+       reachedConsistency = true;
+       ereport(LOG,
+               (errmsg("consistent recovery state reached at %X/%X",
+                       LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
+   }
+
+   /*
+    * Have we got a valid starting snapshot that will allow queries to be
+    * run? If so, we can tell postmaster that the database is consistent now,
+    * enabling connections.
+    */
+   if (standbyState == STANDBY_SNAPSHOT_READY &&
+       !LocalHotStandbyActive &&
+       reachedConsistency &&
+       IsUnderPostmaster)
+   {
+       SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+       XLogRecoveryCtl->SharedHotStandbyActive = true;
+       SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+       LocalHotStandbyActive = true;
+
+       SendPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY);
+   }
+}
+
+/*
+ * Error context callback for errors occurring during rm_redo().
+ */
+static void
+rm_redo_error_callback(void *arg)
+{
+   XLogReaderState *record = (XLogReaderState *) arg;
+   StringInfoData buf;
+
+   initStringInfo(&buf);
+   xlog_outdesc(&buf, record);
+   xlog_block_info(&buf, record);
+
+   /* translator: %s is a WAL record description */
+   errcontext("WAL redo at %X/%X for %s",
+              LSN_FORMAT_ARGS(record->ReadRecPtr),
+              buf.data);
+
+   pfree(buf.data);
+}
+
+/*
+ * Returns a string describing an XLogRecord, consisting of its identity
+ * optionally followed by a colon, a space, and a further description.
+ */
+void
+xlog_outdesc(StringInfo buf, XLogReaderState *record)
+{
+   RmgrId      rmid = XLogRecGetRmid(record);
+   uint8       info = XLogRecGetInfo(record);
+   const char *id;
+
+   appendStringInfoString(buf, RmgrTable[rmid].rm_name);
+   appendStringInfoChar(buf, '/');
+
+   id = RmgrTable[rmid].rm_identify(info);
+   if (id == NULL)
+       appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
+   else
+       appendStringInfo(buf, "%s: ", id);
+
+   RmgrTable[rmid].rm_desc(buf, record);
+}
+
+#ifdef WAL_DEBUG
+
+static void
+xlog_outrec(StringInfo buf, XLogReaderState *record)
+{
+   appendStringInfo(buf, "prev %X/%X; xid %u",
+                    LSN_FORMAT_ARGS(XLogRecGetPrev(record)),
+                    XLogRecGetXid(record));
+
+   appendStringInfo(buf, "; len %u",
+                    XLogRecGetDataLen(record));
+
+   xlog_block_info(buf, record);
+}
+#endif                         /* WAL_DEBUG */
+
+/*
+ * Returns a string giving information about all the blocks in an
+ * XLogRecord.
+ */
+static void
+xlog_block_info(StringInfo buf, XLogReaderState *record)
+{
+   int         block_id;
+
+   /* decode block references */
+   for (block_id = 0; block_id <= record->max_block_id; block_id++)
+   {
+       RelFileNode rnode;
+       ForkNumber  forknum;
+       BlockNumber blk;
+
+       if (!XLogRecHasBlockRef(record, block_id))
+           continue;
+
+       XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
+       if (forknum != MAIN_FORKNUM)
+           appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, fork %u, blk %u",
+                            block_id,
+                            rnode.spcNode, rnode.dbNode, rnode.relNode,
+                            forknum,
+                            blk);
+       else
+           appendStringInfo(buf, "; blkref #%d: rel %u/%u/%u, blk %u",
+                            block_id,
+                            rnode.spcNode, rnode.dbNode, rnode.relNode,
+                            blk);
+       if (XLogRecHasBlockImage(record, block_id))
+           appendStringInfoString(buf, " FPW");
+   }
+}
+
+
+/*
+ * Check that it's OK to switch to new timeline during recovery.
+ *
+ * 'lsn' is the address of the shutdown checkpoint record we're about to
+ * replay. (Currently, timeline can only change at a shutdown checkpoint).
+ */
+static void
+checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI,
+                   TimeLineID replayTLI)
+{
+   /* Check that the record agrees on what the current (old) timeline is */
+   if (prevTLI != replayTLI)
+       ereport(PANIC,
+               (errmsg("unexpected previous timeline ID %u (current timeline ID %u) in checkpoint record",
+                       prevTLI, replayTLI)));
+
+   /*
+    * The new timeline better be in the list of timelines we expect to see,
+    * according to the timeline history. It should also not decrease.
+    */
+   if (newTLI < replayTLI || !tliInHistory(newTLI, expectedTLEs))
+       ereport(PANIC,
+               (errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
+                       newTLI, replayTLI)));
+
+   /*
+    * If we have not yet reached min recovery point, and we're about to
+    * switch to a timeline greater than the timeline of the min recovery
+    * point: trouble. After switching to the new timeline, we could not
+    * possibly visit the min recovery point on the correct timeline anymore.
+    * This can happen if there is a newer timeline in the archive that
+    * branched before the timeline the min recovery point is on, and you
+    * attempt to do PITR to the new timeline.
+    */
+   if (!XLogRecPtrIsInvalid(minRecoveryPoint) &&
+       lsn < minRecoveryPoint &&
+       newTLI > minRecoveryPointTLI)
+       ereport(PANIC,
+               (errmsg("unexpected timeline ID %u in checkpoint record, before reaching minimum recovery point %X/%X on timeline %u",
+                       newTLI,
+                       LSN_FORMAT_ARGS(minRecoveryPoint),
+                       minRecoveryPointTLI)));
+
+   /* Looks good */
+}
+
+
+/*
+ * Extract timestamp from WAL record.
+ *
+ * If the record contains a timestamp, returns true, and saves the timestamp
+ * in *recordXtime. If the record type has no timestamp, returns false.
+ * Currently, only transaction commit/abort records and restore points contain
+ * timestamps.
+ */
+static bool
+getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
+{
+   uint8       info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+   uint8       xact_info = info & XLOG_XACT_OPMASK;
+   uint8       rmid = XLogRecGetRmid(record);
+
+   if (rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
+   {
+       *recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
+       return true;
+   }
+   if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_COMMIT ||
+                              xact_info == XLOG_XACT_COMMIT_PREPARED))
+   {
+       *recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
+       return true;
+   }
+   if (rmid == RM_XACT_ID && (xact_info == XLOG_XACT_ABORT ||
+                              xact_info == XLOG_XACT_ABORT_PREPARED))
+   {
+       *recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
+       return true;
+   }
+   return false;
+}
+
+/*
+ * Checks whether the current buffer page and backup page stored in the
+ * WAL record are consistent or not. Before comparing the two pages, a
+ * masking can be applied to the pages to ignore certain areas like hint bits,
+ * unused space between pd_lower and pd_upper among other things. This
+ * function should be called once WAL replay has been completed for a
+ * given record.
+ */
+static void
+verifyBackupPageConsistency(XLogReaderState *record)
+{
+   RmgrId      rmid = XLogRecGetRmid(record);
+   RelFileNode rnode;
+   ForkNumber  forknum;
+   BlockNumber blkno;
+   int         block_id;
+
+   /* Records with no backup blocks have no need for consistency checks. */
+   if (!XLogRecHasAnyBlockRefs(record))
+       return;
+
+   Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
+
+   for (block_id = 0; block_id <= record->max_block_id; block_id++)
+   {
+       Buffer      buf;
+       Page        page;
+
+       if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
+       {
+           /*
+            * WAL record doesn't contain a block reference with the given id.
+            * Do nothing.
+            */
+           continue;
+       }
+
+       Assert(XLogRecHasBlockImage(record, block_id));
+
+       if (XLogRecBlockImageApply(record, block_id))
+       {
+           /*
+            * WAL record has already applied the page, so bypass the
+            * consistency check as that would result in comparing the full
+            * page stored in the record with itself.
+            */
+           continue;
+       }
+
+       /*
+        * Read the contents from the current buffer and store it in a
+        * temporary page.
+        */
+       buf = XLogReadBufferExtended(rnode, forknum, blkno,
+                                    RBM_NORMAL_NO_LOG);
+       if (!BufferIsValid(buf))
+           continue;
+
+       LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+       page = BufferGetPage(buf);
+
+       /*
+        * Take a copy of the local page where WAL has been applied to have a
+        * comparison base before masking it...
+        */
+       memcpy(replay_image_masked, page, BLCKSZ);
+
+       /* No need for this page anymore now that a copy is in. */
+       UnlockReleaseBuffer(buf);
+
+       /*
+        * If the block LSN is already ahead of this WAL record, we can't
+        * expect contents to match.  This can happen if recovery is
+        * restarted.
+        */
+       if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
+           continue;
+
+       /*
+        * Read the contents from the backup copy, stored in WAL record and
+        * store it in a temporary page. There is no need to allocate a new
+        * page here, a local buffer is fine to hold its contents and a mask
+        * can be directly applied on it.
+        */
+       if (!RestoreBlockImage(record, block_id, primary_image_masked))
+           elog(ERROR, "failed to restore block image");
+
+       /*
+        * If masking function is defined, mask both the primary and replay
+        * images
+        */
+       if (RmgrTable[rmid].rm_mask != NULL)
+       {
+           RmgrTable[rmid].rm_mask(replay_image_masked, blkno);
+           RmgrTable[rmid].rm_mask(primary_image_masked, blkno);
+       }
+
+       /* Time to compare the primary and replay images. */
+       if (memcmp(replay_image_masked, primary_image_masked, BLCKSZ) != 0)
+       {
+           elog(FATAL,
+                "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
+                rnode.spcNode, rnode.dbNode, rnode.relNode,
+                forknum, blkno);
+       }
+   }
+}
+
+/*
+ * For point-in-time recovery, this function decides whether we want to
+ * stop applying the XLOG before the current record.
+ *
+ * Returns true if we are stopping, false otherwise. If stopping, some
+ * information is saved in recoveryStopXid et al for use in annotating the
+ * new timeline's history file.
+ */
+static bool
+recoveryStopsBefore(XLogReaderState *record)
+{
+   bool        stopsHere = false;
+   uint8       xact_info;
+   bool        isCommit;
+   TimestampTz recordXtime = 0;
+   TransactionId recordXid;
+
+   /*
+    * Ignore recovery target settings when not in archive recovery (meaning
+    * we are in crash recovery).
+    */
+   if (!ArchiveRecoveryRequested)
+       return false;
+
+   /* Check if we should stop as soon as reaching consistency */
+   if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
+   {
+       ereport(LOG,
+               (errmsg("recovery stopping after reaching consistency")));
+
+       recoveryStopAfter = false;
+       recoveryStopXid = InvalidTransactionId;
+       recoveryStopLSN = InvalidXLogRecPtr;
+       recoveryStopTime = 0;
+       recoveryStopName[0] = '\0';
+       return true;
+   }
+
+   /* Check if target LSN has been reached */
+   if (recoveryTarget == RECOVERY_TARGET_LSN &&
+       !recoveryTargetInclusive &&
+       record->ReadRecPtr >= recoveryTargetLSN)
+   {
+       recoveryStopAfter = false;
+       recoveryStopXid = InvalidTransactionId;
+       recoveryStopLSN = record->ReadRecPtr;
+       recoveryStopTime = 0;
+       recoveryStopName[0] = '\0';
+       ereport(LOG,
+               (errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
+                       LSN_FORMAT_ARGS(recoveryStopLSN))));
+       return true;
+   }
+
+   /* Otherwise we only consider stopping before COMMIT or ABORT records. */
+   if (XLogRecGetRmid(record) != RM_XACT_ID)
+       return false;
+
+   xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
+
+   if (xact_info == XLOG_XACT_COMMIT)
+   {
+       isCommit = true;
+       recordXid = XLogRecGetXid(record);
+   }
+   else if (xact_info == XLOG_XACT_COMMIT_PREPARED)
+   {
+       xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
+       xl_xact_parsed_commit parsed;
+
+       isCommit = true;
+       ParseCommitRecord(XLogRecGetInfo(record),
+                         xlrec,
+                         &parsed);
+       recordXid = parsed.twophase_xid;
+   }
+   else if (xact_info == XLOG_XACT_ABORT)
+   {
+       isCommit = false;
+       recordXid = XLogRecGetXid(record);
+   }
+   else if (xact_info == XLOG_XACT_ABORT_PREPARED)
+   {
+       xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
+       xl_xact_parsed_abort parsed;
+
+       isCommit = false;
+       ParseAbortRecord(XLogRecGetInfo(record),
+                        xlrec,
+                        &parsed);
+       recordXid = parsed.twophase_xid;
+   }
+   else
+       return false;
+
+   if (recoveryTarget == RECOVERY_TARGET_XID && !recoveryTargetInclusive)
+   {
+       /*
+        * There can be only one transaction end record with this exact
+        * transactionid
+        *
+        * when testing for an xid, we MUST test for equality only, since
+        * transactions are numbered in the order they start, not the order
+        * they complete. A higher numbered xid will complete before you about
+        * 50% of the time...
+        */
+       stopsHere = (recordXid == recoveryTargetXid);
+   }
+
+   if (recoveryTarget == RECOVERY_TARGET_TIME &&
+       getRecordTimestamp(record, &recordXtime))
+   {
+       /*
+        * There can be many transactions that share the same commit time, so
+        * we stop after the last one, if we are inclusive, or stop at the
+        * first one if we are exclusive
+        */
+       if (recoveryTargetInclusive)
+           stopsHere = (recordXtime > recoveryTargetTime);
+       else
+           stopsHere = (recordXtime >= recoveryTargetTime);
+   }
+
+   if (stopsHere)
+   {
+       recoveryStopAfter = false;
+       recoveryStopXid = recordXid;
+       recoveryStopTime = recordXtime;
+       recoveryStopLSN = InvalidXLogRecPtr;
+       recoveryStopName[0] = '\0';
+
+       if (isCommit)
+       {
+           ereport(LOG,
+                   (errmsg("recovery stopping before commit of transaction %u, time %s",
+                           recoveryStopXid,
+                           timestamptz_to_str(recoveryStopTime))));
+       }
+       else
+       {
+           ereport(LOG,
+                   (errmsg("recovery stopping before abort of transaction %u, time %s",
+                           recoveryStopXid,
+                           timestamptz_to_str(recoveryStopTime))));
+       }
+   }
+
+   return stopsHere;
+}
+
+/*
+ * Same as recoveryStopsBefore, but called after applying the record.
+ *
+ * We also track the timestamp of the latest applied COMMIT/ABORT
+ * record in XLogRecoveryCtl->recoveryLastXTime.
+ */
+static bool
+recoveryStopsAfter(XLogReaderState *record)
+{
+   uint8       info;
+   uint8       xact_info;
+   uint8       rmid;
+   TimestampTz recordXtime;
+
+   /*
+    * Ignore recovery target settings when not in archive recovery (meaning
+    * we are in crash recovery).
+    */
+   if (!ArchiveRecoveryRequested)
+       return false;
+
+   info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
+   rmid = XLogRecGetRmid(record);
+
+   /*
+    * There can be many restore points that share the same name; we stop at
+    * the first one.
+    */
+   if (recoveryTarget == RECOVERY_TARGET_NAME &&
+       rmid == RM_XLOG_ID && info == XLOG_RESTORE_POINT)
+   {
+       xl_restore_point *recordRestorePointData;
+
+       recordRestorePointData = (xl_restore_point *) XLogRecGetData(record);
+
+       if (strcmp(recordRestorePointData->rp_name, recoveryTargetName) == 0)
+       {
+           recoveryStopAfter = true;
+           recoveryStopXid = InvalidTransactionId;
+           recoveryStopLSN = InvalidXLogRecPtr;
+           (void) getRecordTimestamp(record, &recoveryStopTime);
+           strlcpy(recoveryStopName, recordRestorePointData->rp_name, MAXFNAMELEN);
+
+           ereport(LOG,
+                   (errmsg("recovery stopping at restore point \"%s\", time %s",
+                           recoveryStopName,
+                           timestamptz_to_str(recoveryStopTime))));
+           return true;
+       }
+   }
+
+   /* Check if the target LSN has been reached */
+   if (recoveryTarget == RECOVERY_TARGET_LSN &&
+       recoveryTargetInclusive &&
+       record->ReadRecPtr >= recoveryTargetLSN)
+   {
+       recoveryStopAfter = true;
+       recoveryStopXid = InvalidTransactionId;
+       recoveryStopLSN = record->ReadRecPtr;
+       recoveryStopTime = 0;
+       recoveryStopName[0] = '\0';
+       ereport(LOG,
+               (errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
+                       LSN_FORMAT_ARGS(recoveryStopLSN))));
+       return true;
+   }
+
+   if (rmid != RM_XACT_ID)
+       return false;
+
+   xact_info = info & XLOG_XACT_OPMASK;
+
+   if (xact_info == XLOG_XACT_COMMIT ||
+       xact_info == XLOG_XACT_COMMIT_PREPARED ||
+       xact_info == XLOG_XACT_ABORT ||
+       xact_info == XLOG_XACT_ABORT_PREPARED)
+   {
+       TransactionId recordXid;
+
+       /* Update the last applied transaction timestamp */
+       if (getRecordTimestamp(record, &recordXtime))
+           SetLatestXTime(recordXtime);
+
+       /* Extract the XID of the committed/aborted transaction */
+       if (xact_info == XLOG_XACT_COMMIT_PREPARED)
+       {
+           xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
+           xl_xact_parsed_commit parsed;
+
+           ParseCommitRecord(XLogRecGetInfo(record),
+                             xlrec,
+                             &parsed);
+           recordXid = parsed.twophase_xid;
+       }
+       else if (xact_info == XLOG_XACT_ABORT_PREPARED)
+       {
+           xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
+           xl_xact_parsed_abort parsed;
+
+           ParseAbortRecord(XLogRecGetInfo(record),
+                            xlrec,
+                            &parsed);
+           recordXid = parsed.twophase_xid;
+       }
+       else
+           recordXid = XLogRecGetXid(record);
+
+       /*
+        * There can be only one transaction end record with this exact
+        * transactionid
+        *
+        * when testing for an xid, we MUST test for equality only, since
+        * transactions are numbered in the order they start, not the order
+        * they complete. A higher numbered xid will complete before you about
+        * 50% of the time...
+        */
+       if (recoveryTarget == RECOVERY_TARGET_XID && recoveryTargetInclusive &&
+           recordXid == recoveryTargetXid)
+       {
+           recoveryStopAfter = true;
+           recoveryStopXid = recordXid;
+           recoveryStopTime = recordXtime;
+           recoveryStopLSN = InvalidXLogRecPtr;
+           recoveryStopName[0] = '\0';
+
+           if (xact_info == XLOG_XACT_COMMIT ||
+               xact_info == XLOG_XACT_COMMIT_PREPARED)
+           {
+               ereport(LOG,
+                       (errmsg("recovery stopping after commit of transaction %u, time %s",
+                               recoveryStopXid,
+                               timestamptz_to_str(recoveryStopTime))));
+           }
+           else if (xact_info == XLOG_XACT_ABORT ||
+                    xact_info == XLOG_XACT_ABORT_PREPARED)
+           {
+               ereport(LOG,
+                       (errmsg("recovery stopping after abort of transaction %u, time %s",
+                               recoveryStopXid,
+                               timestamptz_to_str(recoveryStopTime))));
+           }
+           return true;
+       }
+   }
+
+   /* Check if we should stop as soon as reaching consistency */
+   if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE && reachedConsistency)
+   {
+       ereport(LOG,
+               (errmsg("recovery stopping after reaching consistency")));
+
+       recoveryStopAfter = true;
+       recoveryStopXid = InvalidTransactionId;
+       recoveryStopTime = 0;
+       recoveryStopLSN = InvalidXLogRecPtr;
+       recoveryStopName[0] = '\0';
+       return true;
+   }
+
+   return false;
+}
+
+/*
+ * Create a comment for the history file to explain why and where
+ * timeline changed.
+ */
+static char *
+getRecoveryStopReason(void)
+{
+   char        reason[200];
+
+   if (recoveryTarget == RECOVERY_TARGET_XID)
+       snprintf(reason, sizeof(reason),
+                "%s transaction %u",
+                recoveryStopAfter ? "after" : "before",
+                recoveryStopXid);
+   else if (recoveryTarget == RECOVERY_TARGET_TIME)
+       snprintf(reason, sizeof(reason),
+                "%s %s\n",
+                recoveryStopAfter ? "after" : "before",
+                timestamptz_to_str(recoveryStopTime));
+   else if (recoveryTarget == RECOVERY_TARGET_LSN)
+       snprintf(reason, sizeof(reason),
+                "%s LSN %X/%X\n",
+                recoveryStopAfter ? "after" : "before",
+                LSN_FORMAT_ARGS(recoveryStopLSN));
+   else if (recoveryTarget == RECOVERY_TARGET_NAME)
+       snprintf(reason, sizeof(reason),
+                "at restore point \"%s\"",
+                recoveryStopName);
+   else if (recoveryTarget == RECOVERY_TARGET_IMMEDIATE)
+       snprintf(reason, sizeof(reason), "reached consistency");
+   else
+       snprintf(reason, sizeof(reason), "no recovery target specified");
+
+   return pstrdup(reason);
+}
+
+/*
+ * Wait until shared recoveryPauseState is set to RECOVERY_NOT_PAUSED.
+ *
+ * endOfRecovery is true if the recovery target is reached and
+ * the paused state starts at the end of recovery because of
+ * recovery_target_action=pause, and false otherwise.
+ */
+static void
+recoveryPausesHere(bool endOfRecovery)
+{
+   /* Don't pause unless users can connect! */
+   if (!LocalHotStandbyActive)
+       return;
+
+   /* Don't pause after standby promotion has been triggered */
+   if (LocalPromoteIsTriggered)
+       return;
+
+   if (endOfRecovery)
+       ereport(LOG,
+               (errmsg("pausing at the end of recovery"),
+                errhint("Execute pg_wal_replay_resume() to promote.")));
+   else
+       ereport(LOG,
+               (errmsg("recovery has paused"),
+                errhint("Execute pg_wal_replay_resume() to continue.")));
+
+   /* loop until recoveryPauseState is set to RECOVERY_NOT_PAUSED */
+   while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
+   {
+       HandleStartupProcInterrupts();
+       if (CheckForStandbyTrigger())
+           return;
+
+       /*
+        * If recovery pause is requested then set it paused.  While we are in
+        * the loop, user might resume and pause again so set this every time.
+        */
+       ConfirmRecoveryPaused();
+
+       /*
+        * We wait on a condition variable that will wake us as soon as the
+        * pause ends, but we use a timeout so we can check the above exit
+        * condition periodically too.
+        */
+       ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
+                                   WAIT_EVENT_RECOVERY_PAUSE);
+   }
+   ConditionVariableCancelSleep();
+}
+
+/*
+ * When recovery_min_apply_delay is set, we wait long enough to make sure
+ * certain record types are applied at least that interval behind the primary.
+ *
+ * Returns true if we waited.
+ *
+ * Note that the delay is calculated between the WAL record log time and
+ * the current time on standby. We would prefer to keep track of when this
+ * standby received each WAL record, which would allow a more consistent
+ * approach and one not affected by time synchronisation issues, but that
+ * is significantly more effort and complexity for little actual gain in
+ * usability.
+ */
+static bool
+recoveryApplyDelay(XLogReaderState *record)
+{
+   uint8       xact_info;
+   TimestampTz xtime;
+   TimestampTz delayUntil;
+   long        msecs;
+
+   /* nothing to do if no delay configured */
+   if (recovery_min_apply_delay <= 0)
+       return false;
+
+   /* no delay is applied on a database not yet consistent */
+   if (!reachedConsistency)
+       return false;
+
+   /* nothing to do if crash recovery is requested */
+   if (!ArchiveRecoveryRequested)
+       return false;
+
+   /*
+    * Is it a COMMIT record?
+    *
+    * We deliberately choose not to delay aborts since they have no effect on
+    * MVCC. We already allow replay of records that don't have a timestamp,
+    * so there is already opportunity for issues caused by early conflicts on
+    * standbys.
+    */
+   if (XLogRecGetRmid(record) != RM_XACT_ID)
+       return false;
+
+   xact_info = XLogRecGetInfo(record) & XLOG_XACT_OPMASK;
+
+   if (xact_info != XLOG_XACT_COMMIT &&
+       xact_info != XLOG_XACT_COMMIT_PREPARED)
+       return false;
+
+   if (!getRecordTimestamp(record, &xtime))
+       return false;
+
+   delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
+
+   /*
+    * Exit without arming the latch if it's already past time to apply this
+    * record
+    */
+   msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(), delayUntil);
+   if (msecs <= 0)
+       return false;
+
+   while (true)
+   {
+       ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+
+       /*
+        * This might change recovery_min_apply_delay or the trigger file's
+        * location.
+        */
+       HandleStartupProcInterrupts();
+
+       if (CheckForStandbyTrigger())
+           break;
+
+       /*
+        * Recalculate delayUntil as recovery_min_apply_delay could have
+        * changed while waiting in this loop.
+        */
+       delayUntil = TimestampTzPlusMilliseconds(xtime, recovery_min_apply_delay);
+
+       /*
+        * Wait for difference between GetCurrentTimestamp() and delayUntil.
+        */
+       msecs = TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
+                                               delayUntil);
+
+       if (msecs <= 0)
+           break;
+
+       elog(DEBUG2, "recovery apply delay %ld milliseconds", msecs);
+
+       (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
+                        WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+                        msecs,
+                        WAIT_EVENT_RECOVERY_APPLY_DELAY);
+   }
+   return true;
+}
+
+/*
+ * Get the current state of the recovery pause request.
+ */
+RecoveryPauseState
+GetRecoveryPauseState(void)
+{
+   RecoveryPauseState state;
+
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   state = XLogRecoveryCtl->recoveryPauseState;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   return state;
+}
+
+/*
+ * Set the recovery pause state.
+ *
+ * If recovery pause is requested then sets the recovery pause state to
+ * 'pause requested' if it is not already 'paused'.  Otherwise, sets it
+ * to 'not paused' to resume the recovery.  The recovery pause will be
+ * confirmed by the ConfirmRecoveryPaused.
+ */
+void
+SetRecoveryPause(bool recoveryPause)
+{
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+
+   if (!recoveryPause)
+       XLogRecoveryCtl->recoveryPauseState = RECOVERY_NOT_PAUSED;
+   else if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_NOT_PAUSED)
+       XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSE_REQUESTED;
+
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   if (!recoveryPause)
+       ConditionVariableBroadcast(&XLogRecoveryCtl->recoveryNotPausedCV);
+}
+
+/*
+ * Confirm the recovery pause by setting the recovery pause state to
+ * RECOVERY_PAUSED.
+ */
+static void
+ConfirmRecoveryPaused(void)
+{
+   /* If recovery pause is requested then set it paused */
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   if (XLogRecoveryCtl->recoveryPauseState == RECOVERY_PAUSE_REQUESTED)
+       XLogRecoveryCtl->recoveryPauseState = RECOVERY_PAUSED;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+}
+
+
+/*
+ * Attempt to read the next XLOG record.
+ *
+ * Before first call, the reader needs to be positioned to the first record
+ * by calling XLogBeginRead().
+ *
+ * If no valid record is available, returns NULL, or fails if emode is PANIC.
+ * (emode must be either PANIC, LOG). In standby mode, retries until a valid
+ * record is available.
+ */
+static XLogRecord *
+ReadRecord(XLogReaderState *xlogreader, int emode,
+          bool fetching_ckpt, TimeLineID replayTLI)
+{
+   XLogRecord *record;
+   XLogPageReadPrivate *private = (XLogPageReadPrivate *) xlogreader->private_data;
+
+   /* Pass through parameters to XLogPageRead */
+   private->fetching_ckpt = fetching_ckpt;
+   private->emode = emode;
+   private->randAccess = (xlogreader->ReadRecPtr == InvalidXLogRecPtr);
+   private->replayTLI = replayTLI;
+
+   /* This is the first attempt to read this page. */
+   lastSourceFailed = false;
+
+   for (;;)
+   {
+       char       *errormsg;
+
+       record = XLogReadRecord(xlogreader, &errormsg);
+       if (record == NULL)
+       {
+           /*
+            * When not in standby mode we find that WAL ends in an incomplete
+            * record, keep track of that record.  After recovery is done,
+            * we'll write a record to indicate downstream WAL readers that
+            * that portion is to be ignored.
+            */
+           if (!StandbyMode &&
+               !XLogRecPtrIsInvalid(xlogreader->abortedRecPtr))
+           {
+               abortedRecPtr = xlogreader->abortedRecPtr;
+               missingContrecPtr = xlogreader->missingContrecPtr;
+           }
+
+           if (readFile >= 0)
+           {
+               close(readFile);
+               readFile = -1;
+           }
+
+           /*
+            * We only end up here without a message when XLogPageRead()
+            * failed - in that case we already logged something. In
+            * StandbyMode that only happens if we have been triggered, so we
+            * shouldn't loop anymore in that case.
+            */
+           if (errormsg)
+               ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
+                       (errmsg_internal("%s", errormsg) /* already translated */ ));
+       }
+
+       /*
+        * Check page TLI is one of the expected values.
+        */
+       else if (!tliInHistory(xlogreader->latestPageTLI, expectedTLEs))
+       {
+           char        fname[MAXFNAMELEN];
+           XLogSegNo   segno;
+           int32       offset;
+
+           XLByteToSeg(xlogreader->latestPagePtr, segno, wal_segment_size);
+           offset = XLogSegmentOffset(xlogreader->latestPagePtr,
+                                      wal_segment_size);
+           XLogFileName(fname, xlogreader->seg.ws_tli, segno,
+                        wal_segment_size);
+           ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
+                   (errmsg("unexpected timeline ID %u in log segment %s, offset %u",
+                           xlogreader->latestPageTLI,
+                           fname,
+                           offset)));
+           record = NULL;
+       }
+
+       if (record)
+       {
+           /* Great, got a record */
+           return record;
+       }
+       else
+       {
+           /* No valid record available from this source */
+           lastSourceFailed = true;
+
+           /*
+            * If archive recovery was requested, but we were still doing
+            * crash recovery, switch to archive recovery and retry using the
+            * offline archive. We have now replayed all the valid WAL in
+            * pg_wal, so we are presumably now consistent.
+            *
+            * We require that there's at least some valid WAL present in
+            * pg_wal, however (!fetching_ckpt).  We could recover using the
+            * WAL from the archive, even if pg_wal is completely empty, but
+            * we'd have no idea how far we'd have to replay to reach
+            * consistency.  So err on the safe side and give up.
+            */
+           if (!InArchiveRecovery && ArchiveRecoveryRequested &&
+               !fetching_ckpt)
+           {
+               ereport(DEBUG1,
+                       (errmsg_internal("reached end of WAL in pg_wal, entering archive recovery")));
+               InArchiveRecovery = true;
+               if (StandbyModeRequested)
+                   StandbyMode = true;
+
+               SwitchIntoArchiveRecovery(xlogreader->EndRecPtr, replayTLI);
+               minRecoveryPoint = xlogreader->EndRecPtr;
+               minRecoveryPointTLI = replayTLI;
+
+               CheckRecoveryConsistency();
+
+               /*
+                * Before we retry, reset lastSourceFailed and currentSource
+                * so that we will check the archive next.
+                */
+               lastSourceFailed = false;
+               currentSource = XLOG_FROM_ANY;
+
+               continue;
+           }
+
+           /* In standby mode, loop back to retry. Otherwise, give up. */
+           if (StandbyMode && !CheckForStandbyTrigger())
+               continue;
+           else
+               return NULL;
+       }
+   }
+}
+
+/*
+ * Read the XLOG page containing RecPtr into readBuf (if not read already).
+ * Returns number of bytes read, if the page is read successfully, or -1
+ * in case of errors.  When errors occur, they are ereport'ed, but only
+ * if they have not been previously reported.
+ *
+ * This is responsible for restoring files from archive as needed, as well
+ * as for waiting for the requested WAL record to arrive in standby mode.
+ *
+ * 'emode' specifies the log level used for reporting "file not found" or
+ * "end of WAL" situations in archive recovery, or in standby mode when a
+ * trigger file is found. If set to WARNING or below, XLogPageRead() returns
+ * false in those situations, on higher log levels the ereport() won't
+ * return.
+ *
+ * In standby mode, if after a successful return of XLogPageRead() the
+ * caller finds the record it's interested in to be broken, it should
+ * ereport the error with the level determined by
+ * emode_for_corrupt_record(), and then set lastSourceFailed
+ * and call XLogPageRead() again with the same arguments. This lets
+ * XLogPageRead() to try fetching the record from another source, or to
+ * sleep and retry.
+ */
+static int
+XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
+            XLogRecPtr targetRecPtr, char *readBuf)
+{
+   XLogPageReadPrivate *private =
+   (XLogPageReadPrivate *) xlogreader->private_data;
+   int         emode = private->emode;
+   uint32      targetPageOff;
+   XLogSegNo   targetSegNo PG_USED_FOR_ASSERTS_ONLY;
+   int         r;
+
+   XLByteToSeg(targetPagePtr, targetSegNo, wal_segment_size);
+   targetPageOff = XLogSegmentOffset(targetPagePtr, wal_segment_size);
+
+   /*
+    * See if we need to switch to a new segment because the requested record
+    * is not in the currently open one.
+    */
+   if (readFile >= 0 &&
+       !XLByteInSeg(targetPagePtr, readSegNo, wal_segment_size))
+   {
+       /*
+        * Request a restartpoint if we've replayed too much xlog since the
+        * last one.
+        */
+       if (ArchiveRecoveryRequested && IsUnderPostmaster)
+       {
+           if (XLogCheckpointNeeded(readSegNo))
+           {
+               (void) GetRedoRecPtr();
+               if (XLogCheckpointNeeded(readSegNo))
+                   RequestCheckpoint(CHECKPOINT_CAUSE_XLOG);
+           }
+       }
+
+       close(readFile);
+       readFile = -1;
+       readSource = XLOG_FROM_ANY;
+   }
+
+   XLByteToSeg(targetPagePtr, readSegNo, wal_segment_size);
+
+retry:
+   /* See if we need to retrieve more data */
+   if (readFile < 0 ||
+       (readSource == XLOG_FROM_STREAM &&
+        flushedUpto < targetPagePtr + reqLen))
+   {
+       if (!WaitForWALToBecomeAvailable(targetPagePtr + reqLen,
+                                        private->randAccess,
+                                        private->fetching_ckpt,
+                                        targetRecPtr,
+                                        private->replayTLI,
+                                        xlogreader->EndRecPtr))
+       {
+           if (readFile >= 0)
+               close(readFile);
+           readFile = -1;
+           readLen = 0;
+           readSource = XLOG_FROM_ANY;
+
+           return -1;
+       }
+   }
+
+   /*
+    * At this point, we have the right segment open and if we're streaming we
+    * know the requested record is in it.
+    */
+   Assert(readFile != -1);
+
+   /*
+    * If the current segment is being streamed from the primary, calculate
+    * how much of the current page we have received already. We know the
+    * requested record has been received, but this is for the benefit of
+    * future calls, to allow quick exit at the top of this function.
+    */
+   if (readSource == XLOG_FROM_STREAM)
+   {
+       if (((targetPagePtr) / XLOG_BLCKSZ) != (flushedUpto / XLOG_BLCKSZ))
+           readLen = XLOG_BLCKSZ;
+       else
+           readLen = XLogSegmentOffset(flushedUpto, wal_segment_size) -
+               targetPageOff;
+   }
+   else
+       readLen = XLOG_BLCKSZ;
+
+   /* Read the requested page */
+   readOff = targetPageOff;
+
+   pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
+   r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
+   if (r != XLOG_BLCKSZ)
+   {
+       char        fname[MAXFNAMELEN];
+       int         save_errno = errno;
+
+       pgstat_report_wait_end();
+       XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
+       if (r < 0)
+       {
+           errno = save_errno;
+           ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+                   (errcode_for_file_access(),
+                    errmsg("could not read from log segment %s, offset %u: %m",
+                           fname, readOff)));
+       }
+       else
+           ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
+                   (errcode(ERRCODE_DATA_CORRUPTED),
+                    errmsg("could not read from log segment %s, offset %u: read %d of %zu",
+                           fname, readOff, r, (Size) XLOG_BLCKSZ)));
+       goto next_record_is_invalid;
+   }
+   pgstat_report_wait_end();
+
+   Assert(targetSegNo == readSegNo);
+   Assert(targetPageOff == readOff);
+   Assert(reqLen <= readLen);
+
+   xlogreader->seg.ws_tli = curFileTLI;
+
+   /*
+    * Check the page header immediately, so that we can retry immediately if
+    * it's not valid. This may seem unnecessary, because ReadPageInternal()
+    * validates the page header anyway, and would propagate the failure up to
+    * ReadRecord(), which would retry. However, there's a corner case with
+    * continuation records, if a record is split across two pages such that
+    * we would need to read the two pages from different sources. For
+    * example, imagine a scenario where a streaming replica is started up,
+    * and replay reaches a record that's split across two WAL segments. The
+    * first page is only available locally, in pg_wal, because it's already
+    * been recycled on the primary. The second page, however, is not present
+    * in pg_wal, and we should stream it from the primary. There is a
+    * recycled WAL segment present in pg_wal, with garbage contents, however.
+    * We would read the first page from the local WAL segment, but when
+    * reading the second page, we would read the bogus, recycled, WAL
+    * segment. If we didn't catch that case here, we would never recover,
+    * because ReadRecord() would retry reading the whole record from the
+    * beginning.
+    *
+    * Of course, this only catches errors in the page header, which is what
+    * happens in the case of a recycled WAL segment. Other kinds of errors or
+    * corruption still has the same problem. But this at least fixes the
+    * common case, which can happen as part of normal operation.
+    *
+    * Validating the page header is cheap enough that doing it twice
+    * shouldn't be a big deal from a performance point of view.
+    *
+    * When not in standby mode, an invalid page header should cause recovery
+    * to end, not retry reading the page, so we don't need to validate the
+    * page header here for the retry. Instead, ReadPageInternal() is
+    * responsible for the validation.
+    */
+   if (StandbyMode &&
+       !XLogReaderValidatePageHeader(xlogreader, targetPagePtr, readBuf))
+   {
+       /*
+        * Emit this error right now then retry this page immediately. Use
+        * errmsg_internal() because the message was already translated.
+        */
+       if (xlogreader->errormsg_buf[0])
+           ereport(emode_for_corrupt_record(emode, xlogreader->EndRecPtr),
+                   (errmsg_internal("%s", xlogreader->errormsg_buf)));
+
+       /* reset any error XLogReaderValidatePageHeader() might have set */
+       xlogreader->errormsg_buf[0] = '\0';
+       goto next_record_is_invalid;
+   }
+
+   return readLen;
+
+next_record_is_invalid:
+   lastSourceFailed = true;
+
+   if (readFile >= 0)
+       close(readFile);
+   readFile = -1;
+   readLen = 0;
+   readSource = XLOG_FROM_ANY;
+
+   /* In standby-mode, keep trying */
+   if (StandbyMode)
+       goto retry;
+   else
+       return -1;
+}
+
+/*
+ * Open the WAL segment containing WAL location 'RecPtr'.
+ *
+ * The segment can be fetched via restore_command, or via walreceiver having
+ * streamed the record, or it can already be present in pg_wal. Checking
+ * pg_wal is mainly for crash recovery, but it will be polled in standby mode
+ * too, in case someone copies a new segment directly to pg_wal. That is not
+ * documented or recommended, though.
+ *
+ * If 'fetching_ckpt' is true, we're fetching a checkpoint record, and should
+ * prepare to read WAL starting from RedoStartLSN after this.
+ *
+ * 'RecPtr' might not point to the beginning of the record we're interested
+ * in, it might also point to the page or segment header. In that case,
+ * 'tliRecPtr' is the position of the WAL record we're interested in. It is
+ * used to decide which timeline to stream the requested WAL from.
+ *
+ * 'replayLSN' is the current replay LSN, so that if we scan for new
+ * timelines, we can reject a switch to a timeline that branched off before
+ * this point.
+ *
+ * If the record is not immediately available, the function returns false
+ * if we're not in standby mode. In standby mode, waits for it to become
+ * available.
+ *
+ * When the requested record becomes available, the function opens the file
+ * containing it (if not open already), and returns true. When end of standby
+ * mode is triggered by the user, and there is no more WAL available, returns
+ * false.
+ */
+static bool
+WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
+                           bool fetching_ckpt, XLogRecPtr tliRecPtr,
+                           TimeLineID replayTLI, XLogRecPtr replayLSN)
+{
+   static TimestampTz last_fail_time = 0;
+   TimestampTz now;
+   bool        streaming_reply_sent = false;
+
+   /*-------
+    * Standby mode is implemented by a state machine:
+    *
+    * 1. Read from either archive or pg_wal (XLOG_FROM_ARCHIVE), or just
+    *    pg_wal (XLOG_FROM_PG_WAL)
+    * 2. Check trigger file
+    * 3. Read from primary server via walreceiver (XLOG_FROM_STREAM)
+    * 4. Rescan timelines
+    * 5. Sleep wal_retrieve_retry_interval milliseconds, and loop back to 1.
+    *
+    * Failure to read from the current source advances the state machine to
+    * the next state.
+    *
+    * 'currentSource' indicates the current state. There are no currentSource
+    * values for "check trigger", "rescan timelines", and "sleep" states,
+    * those actions are taken when reading from the previous source fails, as
+    * part of advancing to the next state.
+    *
+    * If standby mode is turned off while reading WAL from stream, we move
+    * to XLOG_FROM_ARCHIVE and reset lastSourceFailed, to force fetching
+    * the files (which would be required at end of recovery, e.g., timeline
+    * history file) from archive or pg_wal. We don't need to kill WAL receiver
+    * here because it's already stopped when standby mode is turned off at
+    * the end of recovery.
+    *-------
+    */
+   if (!InArchiveRecovery)
+       currentSource = XLOG_FROM_PG_WAL;
+   else if (currentSource == XLOG_FROM_ANY ||
+            (!StandbyMode && currentSource == XLOG_FROM_STREAM))
+   {
+       lastSourceFailed = false;
+       currentSource = XLOG_FROM_ARCHIVE;
+   }
+
+   for (;;)
+   {
+       XLogSource  oldSource = currentSource;
+       bool        startWalReceiver = false;
+
+       /*
+        * First check if we failed to read from the current source, and
+        * advance the state machine if so. The failure to read might've
+        * happened outside this function, e.g when a CRC check fails on a
+        * record, or within this loop.
+        */
+       if (lastSourceFailed)
+       {
+           switch (currentSource)
+           {
+               case XLOG_FROM_ARCHIVE:
+               case XLOG_FROM_PG_WAL:
+
+                   /*
+                    * Check to see if the trigger file exists. Note that we
+                    * do this only after failure, so when you create the
+                    * trigger file, we still finish replaying as much as we
+                    * can from archive and pg_wal before failover.
+                    */
+                   if (StandbyMode && CheckForStandbyTrigger())
+                   {
+                       XLogShutdownWalRcv();
+                       return false;
+                   }
+
+                   /*
+                    * Not in standby mode, and we've now tried the archive
+                    * and pg_wal.
+                    */
+                   if (!StandbyMode)
+                       return false;
+
+                   /*
+                    * Move to XLOG_FROM_STREAM state, and set to start a
+                    * walreceiver if necessary.
+                    */
+                   currentSource = XLOG_FROM_STREAM;
+                   startWalReceiver = true;
+                   break;
+
+               case XLOG_FROM_STREAM:
+
+                   /*
+                    * Failure while streaming. Most likely, we got here
+                    * because streaming replication was terminated, or
+                    * promotion was triggered. But we also get here if we
+                    * find an invalid record in the WAL streamed from the
+                    * primary, in which case something is seriously wrong.
+                    * There's little chance that the problem will just go
+                    * away, but PANIC is not good for availability either,
+                    * especially in hot standby mode. So, we treat that the
+                    * same as disconnection, and retry from archive/pg_wal
+                    * again. The WAL in the archive should be identical to
+                    * what was streamed, so it's unlikely that it helps, but
+                    * one can hope...
+                    */
+
+                   /*
+                    * We should be able to move to XLOG_FROM_STREAM only in
+                    * standby mode.
+                    */
+                   Assert(StandbyMode);
+
+                   /*
+                    * Before we leave XLOG_FROM_STREAM state, make sure that
+                    * walreceiver is not active, so that it won't overwrite
+                    * WAL that we restore from archive.
+                    */
+                   if (WalRcvStreaming())
+                       XLogShutdownWalRcv();
+
+                   /*
+                    * Before we sleep, re-scan for possible new timelines if
+                    * we were requested to recover to the latest timeline.
+                    */
+                   if (recoveryTargetTimeLineGoal == RECOVERY_TARGET_TIMELINE_LATEST)
+                   {
+                       if (rescanLatestTimeLine(replayTLI, replayLSN))
+                       {
+                           currentSource = XLOG_FROM_ARCHIVE;
+                           break;
+                       }
+                   }
+
+                   /*
+                    * XLOG_FROM_STREAM is the last state in our state
+                    * machine, so we've exhausted all the options for
+                    * obtaining the requested WAL. We're going to loop back
+                    * and retry from the archive, but if it hasn't been long
+                    * since last attempt, sleep wal_retrieve_retry_interval
+                    * milliseconds to avoid busy-waiting.
+                    */
+                   now = GetCurrentTimestamp();
+                   if (!TimestampDifferenceExceeds(last_fail_time, now,
+                                                   wal_retrieve_retry_interval))
+                   {
+                       long        wait_time;
+
+                       wait_time = wal_retrieve_retry_interval -
+                           TimestampDifferenceMilliseconds(last_fail_time, now);
+
+                       elog(LOG, "waiting for WAL to become available at %X/%X",
+                            LSN_FORMAT_ARGS(RecPtr));
+
+                       (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
+                                        WL_LATCH_SET | WL_TIMEOUT |
+                                        WL_EXIT_ON_PM_DEATH,
+                                        wait_time,
+                                        WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL);
+                       ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+                       now = GetCurrentTimestamp();
+
+                       /* Handle interrupt signals of startup process */
+                       HandleStartupProcInterrupts();
+                   }
+                   last_fail_time = now;
+                   currentSource = XLOG_FROM_ARCHIVE;
+                   break;
+
+               default:
+                   elog(ERROR, "unexpected WAL source %d", currentSource);
+           }
+       }
+       else if (currentSource == XLOG_FROM_PG_WAL)
+       {
+           /*
+            * We just successfully read a file in pg_wal. We prefer files in
+            * the archive over ones in pg_wal, so try the next file again
+            * from the archive first.
+            */
+           if (InArchiveRecovery)
+               currentSource = XLOG_FROM_ARCHIVE;
+       }
+
+       if (currentSource != oldSource)
+           elog(DEBUG2, "switched WAL source from %s to %s after %s",
+                xlogSourceNames[oldSource], xlogSourceNames[currentSource],
+                lastSourceFailed ? "failure" : "success");
+
+       /*
+        * We've now handled possible failure. Try to read from the chosen
+        * source.
+        */
+       lastSourceFailed = false;
+
+       switch (currentSource)
+       {
+           case XLOG_FROM_ARCHIVE:
+           case XLOG_FROM_PG_WAL:
+
+               /*
+                * WAL receiver must not be running when reading WAL from
+                * archive or pg_wal.
+                */
+               Assert(!WalRcvStreaming());
+
+               /* Close any old file we might have open. */
+               if (readFile >= 0)
+               {
+                   close(readFile);
+                   readFile = -1;
+               }
+               /* Reset curFileTLI if random fetch. */
+               if (randAccess)
+                   curFileTLI = 0;
+
+               /*
+                * Try to restore the file from archive, or read an existing
+                * file from pg_wal.
+                */
+               readFile = XLogFileReadAnyTLI(readSegNo, DEBUG2,
+                                             currentSource == XLOG_FROM_ARCHIVE ? XLOG_FROM_ANY :
+                                             currentSource);
+               if (readFile >= 0)
+                   return true;    /* success! */
+
+               /*
+                * Nope, not found in archive or pg_wal.
+                */
+               lastSourceFailed = true;
+               break;
+
+           case XLOG_FROM_STREAM:
+               {
+                   bool        havedata;
+
+                   /*
+                    * We should be able to move to XLOG_FROM_STREAM only in
+                    * standby mode.
+                    */
+                   Assert(StandbyMode);
+
+                   /*
+                    * First, shutdown walreceiver if its restart has been
+                    * requested -- but no point if we're already slated for
+                    * starting it.
+                    */
+                   if (pendingWalRcvRestart && !startWalReceiver)
+                   {
+                       XLogShutdownWalRcv();
+
+                       /*
+                        * Re-scan for possible new timelines if we were
+                        * requested to recover to the latest timeline.
+                        */
+                       if (recoveryTargetTimeLineGoal ==
+                           RECOVERY_TARGET_TIMELINE_LATEST)
+                           rescanLatestTimeLine(replayTLI, replayLSN);
+
+                       startWalReceiver = true;
+                   }
+                   pendingWalRcvRestart = false;
+
+                   /*
+                    * Launch walreceiver if needed.
+                    *
+                    * If fetching_ckpt is true, RecPtr points to the initial
+                    * checkpoint location. In that case, we use RedoStartLSN
+                    * as the streaming start position instead of RecPtr, so
+                    * that when we later jump backwards to start redo at
+                    * RedoStartLSN, we will have the logs streamed already.
+                    */
+                   if (startWalReceiver &&
+                       PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+                   {
+                       XLogRecPtr  ptr;
+                       TimeLineID  tli;
+
+                       if (fetching_ckpt)
+                       {
+                           ptr = RedoStartLSN;
+                           tli = RedoStartTLI;
+                       }
+                       else
+                       {
+                           ptr = RecPtr;
+
+                           /*
+                            * Use the record begin position to determine the
+                            * TLI, rather than the position we're reading.
+                            */
+                           tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+                           if (curFileTLI > 0 && tli < curFileTLI)
+                               elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+                                    LSN_FORMAT_ARGS(tliRecPtr),
+                                    tli, curFileTLI);
+                       }
+                       curFileTLI = tli;
+                       SetInstallXLogFileSegmentActive();
+                       RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+                                            PrimarySlotName,
+                                            wal_receiver_create_temp_slot);
+                       flushedUpto = 0;
+                   }
+
+                   /*
+                    * Check if WAL receiver is active or wait to start up.
+                    */
+                   if (!WalRcvStreaming())
+                   {
+                       lastSourceFailed = true;
+                       break;
+                   }
+
+                   /*
+                    * Walreceiver is active, so see if new data has arrived.
+                    *
+                    * We only advance XLogReceiptTime when we obtain fresh
+                    * WAL from walreceiver and observe that we had already
+                    * processed everything before the most recent "chunk"
+                    * that it flushed to disk.  In steady state where we are
+                    * keeping up with the incoming data, XLogReceiptTime will
+                    * be updated on each cycle. When we are behind,
+                    * XLogReceiptTime will not advance, so the grace time
+                    * allotted to conflicting queries will decrease.
+                    */
+                   if (RecPtr < flushedUpto)
+                       havedata = true;
+                   else
+                   {
+                       XLogRecPtr  latestChunkStart;
+
+                       flushedUpto = GetWalRcvFlushRecPtr(&latestChunkStart, &receiveTLI);
+                       if (RecPtr < flushedUpto && receiveTLI == curFileTLI)
+                       {
+                           havedata = true;
+                           if (latestChunkStart <= RecPtr)
+                           {
+                               XLogReceiptTime = GetCurrentTimestamp();
+                               SetCurrentChunkStartTime(XLogReceiptTime);
+                           }
+                       }
+                       else
+                           havedata = false;
+                   }
+                   if (havedata)
+                   {
+                       /*
+                        * Great, streamed far enough.  Open the file if it's
+                        * not open already.  Also read the timeline history
+                        * file if we haven't initialized timeline history
+                        * yet; it should be streamed over and present in
+                        * pg_wal by now.  Use XLOG_FROM_STREAM so that source
+                        * info is set correctly and XLogReceiptTime isn't
+                        * changed.
+                        *
+                        * NB: We must set readTimeLineHistory based on
+                        * recoveryTargetTLI, not receiveTLI. Normally they'll
+                        * be the same, but if recovery_target_timeline is
+                        * 'latest' and archiving is configured, then it's
+                        * possible that we managed to retrieve one or more
+                        * new timeline history files from the archive,
+                        * updating recoveryTargetTLI.
+                        */
+                       if (readFile < 0)
+                       {
+                           if (!expectedTLEs)
+                               expectedTLEs = readTimeLineHistory(recoveryTargetTLI);
+                           readFile = XLogFileRead(readSegNo, PANIC,
+                                                   receiveTLI,
+                                                   XLOG_FROM_STREAM, false);
+                           Assert(readFile >= 0);
+                       }
+                       else
+                       {
+                           /* just make sure source info is correct... */
+                           readSource = XLOG_FROM_STREAM;
+                           XLogReceiptSource = XLOG_FROM_STREAM;
+                           return true;
+                       }
+                       break;
+                   }
+
+                   /*
+                    * Data not here yet. Check for trigger, then wait for
+                    * walreceiver to wake us up when new WAL arrives.
+                    */
+                   if (CheckForStandbyTrigger())
+                   {
+                       /*
+                        * Note that we don't "return false" immediately here.
+                        * After being triggered, we still want to replay all
+                        * the WAL that was already streamed. It's in pg_wal
+                        * now, so we just treat this as a failure, and the
+                        * state machine will move on to replay the streamed
+                        * WAL from pg_wal, and then recheck the trigger and
+                        * exit replay.
+                        */
+                       lastSourceFailed = true;
+                       break;
+                   }
+
+                   /*
+                    * Since we have replayed everything we have received so
+                    * far and are about to start waiting for more WAL, let's
+                    * tell the upstream server our replay location now so
+                    * that pg_stat_replication doesn't show stale
+                    * information.
+                    */
+                   if (!streaming_reply_sent)
+                   {
+                       WalRcvForceReply();
+                       streaming_reply_sent = true;
+                   }
+
+                   /*
+                    * Wait for more WAL to arrive. Time out after 5 seconds
+                    * to react to a trigger file promptly and to check if the
+                    * WAL receiver is still active.
+                    */
+                   (void) WaitLatch(&XLogRecoveryCtl->recoveryWakeupLatch,
+                                    WL_LATCH_SET | WL_TIMEOUT |
+                                    WL_EXIT_ON_PM_DEATH,
+                                    5000L, WAIT_EVENT_RECOVERY_WAL_STREAM);
+                   ResetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+                   break;
+               }
+
+           default:
+               elog(ERROR, "unexpected WAL source %d", currentSource);
+       }
+
+       /*
+        * Check for recovery pause here so that we can confirm more quickly
+        * that a requested pause has actually taken effect.
+        */
+       if (((volatile XLogRecoveryCtlData *) XLogRecoveryCtl)->recoveryPauseState !=
+           RECOVERY_NOT_PAUSED)
+           recoveryPausesHere(false);
+
+       /*
+        * This possibly-long loop needs to handle interrupts of startup
+        * process.
+        */
+       HandleStartupProcInterrupts();
+   }
+
+   return false;               /* not reached */
+}
+
+
+/*
+ * Determine what log level should be used to report a corrupt WAL record
+ * in the current WAL page, previously read by XLogPageRead().
+ *
+ * 'emode' is the error mode that would be used to report a file-not-found
+ * or legitimate end-of-WAL situation.   Generally, we use it as-is, but if
+ * we're retrying the exact same record that we've tried previously, only
+ * complain the first time to keep the noise down.  However, we only do when
+ * reading from pg_wal, because we don't expect any invalid records in archive
+ * or in records streamed from the primary. Files in the archive should be complete,
+ * and we should never hit the end of WAL because we stop and wait for more WAL
+ * to arrive before replaying it.
+ *
+ * NOTE: This function remembers the RecPtr value it was last called with,
+ * to suppress repeated messages about the same record. Only call this when
+ * you are about to ereport(), or you might cause a later message to be
+ * erroneously suppressed.
+ */
+static int
+emode_for_corrupt_record(int emode, XLogRecPtr RecPtr)
+{
+   static XLogRecPtr lastComplaint = 0;
+
+   if (readSource == XLOG_FROM_PG_WAL && emode == LOG)
+   {
+       if (RecPtr == lastComplaint)
+           emode = DEBUG1;
+       else
+           lastComplaint = RecPtr;
+   }
+   return emode;
+}
+
+
+/*
+ * Subroutine to try to fetch and validate a prior checkpoint record.
+ *
+ * whichChkpt identifies the checkpoint (merely for reporting purposes).
+ * 1 for "primary", 0 for "other" (backup_label)
+ */
+static XLogRecord *
+ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
+                    int whichChkpt, bool report, TimeLineID replayTLI)
+{
+   XLogRecord *record;
+   uint8       info;
+
+   Assert(xlogreader != NULL);
+
+   if (!XRecOffIsValid(RecPtr))
+   {
+       if (!report)
+           return NULL;
+
+       switch (whichChkpt)
+       {
+           case 1:
+               ereport(LOG,
+                       (errmsg("invalid primary checkpoint link in control file")));
+               break;
+           default:
+               ereport(LOG,
+                       (errmsg("invalid checkpoint link in backup_label file")));
+               break;
+       }
+       return NULL;
+   }
+
+   XLogBeginRead(xlogreader, RecPtr);
+   record = ReadRecord(xlogreader, LOG, true, replayTLI);
+
+   if (record == NULL)
+   {
+       if (!report)
+           return NULL;
+
+       switch (whichChkpt)
+       {
+           case 1:
+               ereport(LOG,
+                       (errmsg("invalid primary checkpoint record")));
+               break;
+           default:
+               ereport(LOG,
+                       (errmsg("invalid checkpoint record")));
+               break;
+       }
+       return NULL;
+   }
+   if (record->xl_rmid != RM_XLOG_ID)
+   {
+       switch (whichChkpt)
+       {
+           case 1:
+               ereport(LOG,
+                       (errmsg("invalid resource manager ID in primary checkpoint record")));
+               break;
+           default:
+               ereport(LOG,
+                       (errmsg("invalid resource manager ID in checkpoint record")));
+               break;
+       }
+       return NULL;
+   }
+   info = record->xl_info & ~XLR_INFO_MASK;
+   if (info != XLOG_CHECKPOINT_SHUTDOWN &&
+       info != XLOG_CHECKPOINT_ONLINE)
+   {
+       switch (whichChkpt)
+       {
+           case 1:
+               ereport(LOG,
+                       (errmsg("invalid xl_info in primary checkpoint record")));
+               break;
+           default:
+               ereport(LOG,
+                       (errmsg("invalid xl_info in checkpoint record")));
+               break;
+       }
+       return NULL;
+   }
+   if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
+   {
+       switch (whichChkpt)
+       {
+           case 1:
+               ereport(LOG,
+                       (errmsg("invalid length of primary checkpoint record")));
+               break;
+           default:
+               ereport(LOG,
+                       (errmsg("invalid length of checkpoint record")));
+               break;
+       }
+       return NULL;
+   }
+   return record;
+}
+
+/*
+ * Scan for new timelines that might have appeared in the archive since we
+ * started recovery.
+ *
+ * If there are any, the function changes recovery target TLI to the latest
+ * one and returns 'true'.
+ */
+static bool
+rescanLatestTimeLine(TimeLineID replayTLI, XLogRecPtr replayLSN)
+{
+   List       *newExpectedTLEs;
+   bool        found;
+   ListCell   *cell;
+   TimeLineID  newtarget;
+   TimeLineID  oldtarget = recoveryTargetTLI;
+   TimeLineHistoryEntry *currentTle = NULL;
+
+   newtarget = findNewestTimeLine(recoveryTargetTLI);
+   if (newtarget == recoveryTargetTLI)
+   {
+       /* No new timelines found */
+       return false;
+   }
+
+   /*
+    * Determine the list of expected TLIs for the new TLI
+    */
+
+   newExpectedTLEs = readTimeLineHistory(newtarget);
+
+   /*
+    * If the current timeline is not part of the history of the new timeline,
+    * we cannot proceed to it.
+    */
+   found = false;
+   foreach(cell, newExpectedTLEs)
+   {
+       currentTle = (TimeLineHistoryEntry *) lfirst(cell);
+
+       if (currentTle->tli == recoveryTargetTLI)
+       {
+           found = true;
+           break;
+       }
+   }
+   if (!found)
+   {
+       ereport(LOG,
+               (errmsg("new timeline %u is not a child of database system timeline %u",
+                       newtarget,
+                       replayTLI)));
+       return false;
+   }
+
+   /*
+    * The current timeline was found in the history file, but check that the
+    * next timeline was forked off from it *after* the current recovery
+    * location.
+    */
+   if (currentTle->end < replayLSN)
+   {
+       ereport(LOG,
+               (errmsg("new timeline %u forked off current database system timeline %u before current recovery point %X/%X",
+                       newtarget,
+                       replayTLI,
+                       LSN_FORMAT_ARGS(replayLSN))));
+       return false;
+   }
+
+   /* The new timeline history seems valid. Switch target */
+   recoveryTargetTLI = newtarget;
+   list_free_deep(expectedTLEs);
+   expectedTLEs = newExpectedTLEs;
+
+   /*
+    * As in StartupXLOG(), try to ensure we have all the history files
+    * between the old target and new target in pg_wal.
+    */
+   restoreTimeLineHistoryFiles(oldtarget + 1, newtarget);
+
+   ereport(LOG,
+           (errmsg("new target timeline is %u",
+                   recoveryTargetTLI)));
+
+   return true;
+}
+
+
+/*
+ * Open a logfile segment for reading (during recovery).
+ *
+ * If source == XLOG_FROM_ARCHIVE, the segment is retrieved from archive.
+ * Otherwise, it's assumed to be already available in pg_wal.
+ */
+static int
+XLogFileRead(XLogSegNo segno, int emode, TimeLineID tli,
+            XLogSource source, bool notfoundOk)
+{
+   char        xlogfname[MAXFNAMELEN];
+   char        activitymsg[MAXFNAMELEN + 16];
+   char        path[MAXPGPATH];
+   int         fd;
+
+   XLogFileName(xlogfname, tli, segno, wal_segment_size);
+
+   switch (source)
+   {
+       case XLOG_FROM_ARCHIVE:
+           /* Report recovery progress in PS display */
+           snprintf(activitymsg, sizeof(activitymsg), "waiting for %s",
+                    xlogfname);
+           set_ps_display(activitymsg);
+
+           if (!RestoreArchivedFile(path, xlogfname,
+                                    "RECOVERYXLOG",
+                                    wal_segment_size,
+                                    InRedo))
+               return -1;
+           break;
+
+       case XLOG_FROM_PG_WAL:
+       case XLOG_FROM_STREAM:
+           XLogFilePath(path, tli, segno, wal_segment_size);
+           break;
+
+       default:
+           elog(ERROR, "invalid XLogFileRead source %d", source);
+   }
+
+   /*
+    * If the segment was fetched from archival storage, replace the existing
+    * xlog segment (if any) with the archival version.
+    */
+   if (source == XLOG_FROM_ARCHIVE)
+   {
+       Assert(!IsInstallXLogFileSegmentActive());
+       KeepFileRestoredFromArchive(path, xlogfname);
+
+       /*
+        * Set path to point at the new file in pg_wal.
+        */
+       snprintf(path, MAXPGPATH, XLOGDIR "/%s", xlogfname);
+   }
+
+   fd = BasicOpenFile(path, O_RDONLY | PG_BINARY);
+   if (fd >= 0)
+   {
+       /* Success! */
+       curFileTLI = tli;
+
+       /* Report recovery progress in PS display */
+       snprintf(activitymsg, sizeof(activitymsg), "recovering %s",
+                xlogfname);
+       set_ps_display(activitymsg);
+
+       /* Track source of data in assorted state variables */
+       readSource = source;
+       XLogReceiptSource = source;
+       /* In FROM_STREAM case, caller tracks receipt time, not me */
+       if (source != XLOG_FROM_STREAM)
+           XLogReceiptTime = GetCurrentTimestamp();
+
+       return fd;
+   }
+   if (errno != ENOENT || !notfoundOk) /* unexpected failure? */
+       ereport(PANIC,
+               (errcode_for_file_access(),
+                errmsg("could not open file \"%s\": %m", path)));
+   return -1;
+}
+
+/*
+ * Open a logfile segment for reading (during recovery).
+ *
+ * This version searches for the segment with any TLI listed in expectedTLEs.
+ */
+static int
+XLogFileReadAnyTLI(XLogSegNo segno, int emode, XLogSource source)
+{
+   char        path[MAXPGPATH];
+   ListCell   *cell;
+   int         fd;
+   List       *tles;
+
+   /*
+    * Loop looking for a suitable timeline ID: we might need to read any of
+    * the timelines listed in expectedTLEs.
+    *
+    * We expect curFileTLI on entry to be the TLI of the preceding file in
+    * sequence, or 0 if there was no predecessor.  We do not allow curFileTLI
+    * to go backwards; this prevents us from picking up the wrong file when a
+    * parent timeline extends to higher segment numbers than the child we
+    * want to read.
+    *
+    * If we haven't read the timeline history file yet, read it now, so that
+    * we know which TLIs to scan.  We don't save the list in expectedTLEs,
+    * however, unless we actually find a valid segment.  That way if there is
+    * neither a timeline history file nor a WAL segment in the archive, and
+    * streaming replication is set up, we'll read the timeline history file
+    * streamed from the primary when we start streaming, instead of
+    * recovering with a dummy history generated here.
+    */
+   if (expectedTLEs)
+       tles = expectedTLEs;
+   else
+       tles = readTimeLineHistory(recoveryTargetTLI);
+
+   foreach(cell, tles)
+   {
+       TimeLineHistoryEntry *hent = (TimeLineHistoryEntry *) lfirst(cell);
+       TimeLineID  tli = hent->tli;
+
+       if (tli < curFileTLI)
+           break;              /* don't bother looking at too-old TLIs */
+
+       /*
+        * Skip scanning the timeline ID that the logfile segment to read
+        * doesn't belong to
+        */
+       if (hent->begin != InvalidXLogRecPtr)
+       {
+           XLogSegNo   beginseg = 0;
+
+           XLByteToSeg(hent->begin, beginseg, wal_segment_size);
+
+           /*
+            * The logfile segment that doesn't belong to the timeline is
+            * older or newer than the segment that the timeline started or
+            * ended at, respectively. It's sufficient to check only the
+            * starting segment of the timeline here. Since the timelines are
+            * scanned in descending order in this loop, any segments newer
+            * than the ending segment should belong to newer timeline and
+            * have already been read before. So it's not necessary to check
+            * the ending segment of the timeline here.
+            */
+           if (segno < beginseg)
+               continue;
+       }
+
+       if (source == XLOG_FROM_ANY || source == XLOG_FROM_ARCHIVE)
+       {
+           fd = XLogFileRead(segno, emode, tli,
+                             XLOG_FROM_ARCHIVE, true);
+           if (fd != -1)
+           {
+               elog(DEBUG1, "got WAL segment from archive");
+               if (!expectedTLEs)
+                   expectedTLEs = tles;
+               return fd;
+           }
+       }
+
+       if (source == XLOG_FROM_ANY || source == XLOG_FROM_PG_WAL)
+       {
+           fd = XLogFileRead(segno, emode, tli,
+                             XLOG_FROM_PG_WAL, true);
+           if (fd != -1)
+           {
+               if (!expectedTLEs)
+                   expectedTLEs = tles;
+               return fd;
+           }
+       }
+   }
+
+   /* Couldn't find it.  For simplicity, complain about front timeline */
+   XLogFilePath(path, recoveryTargetTLI, segno, wal_segment_size);
+   errno = ENOENT;
+   ereport(emode,
+           (errcode_for_file_access(),
+            errmsg("could not open file \"%s\": %m", path)));
+   return -1;
+}
+
+/*
+ * Set flag to signal the walreceiver to restart.  (The startup process calls
+ * this on noticing a relevant configuration change.)
+ */
+void
+StartupRequestWalReceiverRestart(void)
+{
+   if (currentSource == XLOG_FROM_STREAM && WalRcvRunning())
+   {
+       ereport(LOG,
+               (errmsg("WAL receiver process shutdown requested")));
+
+       pendingWalRcvRestart = true;
+   }
+}
+
+
+/*
+ * Has a standby promotion already been triggered?
+ *
+ * Unlike CheckForStandbyTrigger(), this works in any process
+ * that's connected to shared memory.
+ */
+bool
+PromoteIsTriggered(void)
+{
+   /*
+    * We check shared state each time only until a standby promotion is
+    * triggered. We can't trigger a promotion again, so there's no need to
+    * keep checking after the shared variable has once been seen true.
+    */
+   if (LocalPromoteIsTriggered)
+       return true;
+
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   LocalPromoteIsTriggered = XLogRecoveryCtl->SharedPromoteIsTriggered;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   return LocalPromoteIsTriggered;
+}
+
+static void
+SetPromoteIsTriggered(void)
+{
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   XLogRecoveryCtl->SharedPromoteIsTriggered = true;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   /*
+    * Mark the recovery pause state as 'not paused' because the paused state
+    * ends and promotion continues if a promotion is triggered while recovery
+    * is paused. Otherwise pg_get_wal_replay_pause_state() can mistakenly
+    * return 'paused' while a promotion is ongoing.
+    */
+   SetRecoveryPause(false);
+
+   LocalPromoteIsTriggered = true;
+}
+
+/*
+ * Check to see whether the user-specified trigger file exists and whether a
+ * promote request has arrived.  If either condition holds, return true.
+ */
+static bool
+CheckForStandbyTrigger(void)
+{
+   struct stat stat_buf;
+
+   if (LocalPromoteIsTriggered)
+       return true;
+
+   if (IsPromoteSignaled() && CheckPromoteSignal())
+   {
+       ereport(LOG, (errmsg("received promote request")));
+       RemovePromoteSignalFiles();
+       ResetPromoteSignaled();
+       SetPromoteIsTriggered();
+       return true;
+   }
+
+   if (PromoteTriggerFile == NULL || strcmp(PromoteTriggerFile, "") == 0)
+       return false;
+
+   if (stat(PromoteTriggerFile, &stat_buf) == 0)
+   {
+       ereport(LOG,
+               (errmsg("promote trigger file found: %s", PromoteTriggerFile)));
+       unlink(PromoteTriggerFile);
+       SetPromoteIsTriggered();
+       return true;
+   }
+   else if (errno != ENOENT)
+       ereport(ERROR,
+               (errcode_for_file_access(),
+                errmsg("could not stat promote trigger file \"%s\": %m",
+                       PromoteTriggerFile)));
+
+   return false;
+}
+
+/*
+ * Remove the files signaling a standby promotion request.
+ */
+void
+RemovePromoteSignalFiles(void)
+{
+   unlink(PROMOTE_SIGNAL_FILE);
+}
+
+/*
+ * Check to see if a promote request has arrived.
+ */
+bool
+CheckPromoteSignal(void)
+{
+   struct stat stat_buf;
+
+   if (stat(PROMOTE_SIGNAL_FILE, &stat_buf) == 0)
+       return true;
+
+   return false;
+}
+
+/*
+ * Wake up startup process to replay newly arrived WAL, or to notice that
+ * failover has been requested.
+ */
+void
+WakeupRecovery(void)
+{
+   SetLatch(&XLogRecoveryCtl->recoveryWakeupLatch);
+}
+
+/*
+ * Schedule a walreceiver wakeup in the main recovery loop.
+ */
+void
+XLogRequestWalReceiverReply(void)
+{
+   doRequestWalReceiverReply = true;
+}
+
+/*
+ * Is HotStandby active yet? This is only important in special backends
+ * since normal backends won't ever be able to connect until this returns
+ * true. Postmaster knows this by way of signal, not via shared memory.
+ *
+ * Unlike testing standbyState, this works in any process that's connected to
+ * shared memory.  (And note that standbyState alone doesn't tell the truth
+ * anyway.)
+ */
+bool
+HotStandbyActive(void)
+{
+   /*
+    * We check shared state each time only until Hot Standby is active. We
+    * can't de-activate Hot Standby, so there's no need to keep checking
+    * after the shared variable has once been seen true.
+    */
+   if (LocalHotStandbyActive)
+       return true;
+   else
+   {
+       /* spinlock is essential on machines with weak memory ordering! */
+       SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+       LocalHotStandbyActive = XLogRecoveryCtl->SharedHotStandbyActive;
+       SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+       return LocalHotStandbyActive;
+   }
+}
+
+/*
+ * Like HotStandbyActive(), but to be used only in WAL replay code,
+ * where we don't need to ask any other process what the state is.
+ */
+static bool
+HotStandbyActiveInReplay(void)
+{
+   Assert(AmStartupProcess() || !IsPostmasterEnvironment);
+   return LocalHotStandbyActive;
+}
+
+/*
+ * Get latest redo apply position.
+ *
+ * Exported to allow WALReceiver to read the pointer directly.
+ */
+XLogRecPtr
+GetXLogReplayRecPtr(TimeLineID *replayTLI)
+{
+   XLogRecPtr  recptr;
+   TimeLineID  tli;
+
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   recptr = XLogRecoveryCtl->lastReplayedEndRecPtr;
+   tli = XLogRecoveryCtl->lastReplayedTLI;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   if (replayTLI)
+       *replayTLI = tli;
+   return recptr;
+}
+
+
+/*
+ * Get position of last applied, or the record being applied.
+ *
+ * This is different from GetLogReplayRecPtr() in that if a WAL
+ * record is currently being applied, this includes that record.
+ */
+XLogRecPtr
+GetCurrentReplayRecPtr(TimeLineID *replayEndTLI)
+{
+   XLogRecPtr  recptr;
+   TimeLineID  tli;
+
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   recptr = XLogRecoveryCtl->replayEndRecPtr;
+   tli = XLogRecoveryCtl->replayEndTLI;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   if (replayEndTLI)
+       *replayEndTLI = tli;
+   return recptr;
+}
+
+/*
+ * Save timestamp of latest processed commit/abort record.
+ *
+ * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
+ * seen by processes other than the startup process.  Note in particular
+ * that CreateRestartPoint is executed in the checkpointer.
+ */
+static void
+SetLatestXTime(TimestampTz xtime)
+{
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   XLogRecoveryCtl->recoveryLastXTime = xtime;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+}
+
+/*
+ * Fetch timestamp of latest processed commit/abort record.
+ */
+TimestampTz
+GetLatestXTime(void)
+{
+   TimestampTz xtime;
+
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   xtime = XLogRecoveryCtl->recoveryLastXTime;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   return xtime;
+}
+
+/*
+ * Save timestamp of the next chunk of WAL records to apply.
+ *
+ * We keep this in XLogRecoveryCtl, not a simple static variable, so that it can be
+ * seen by all backends.
+ */
+static void
+SetCurrentChunkStartTime(TimestampTz xtime)
+{
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   XLogRecoveryCtl->currentChunkStartTime = xtime;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+}
+
+/*
+ * Fetch timestamp of latest processed commit/abort record.
+ * Startup process maintains an accurate local copy in XLogReceiptTime
+ */
+TimestampTz
+GetCurrentChunkReplayStartTime(void)
+{
+   TimestampTz xtime;
+
+   SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+   xtime = XLogRecoveryCtl->currentChunkStartTime;
+   SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+   return xtime;
+}
+
+/*
+ * Returns time of receipt of current chunk of XLOG data, as well as
+ * whether it was received from streaming replication or from archives.
+ */
+void
+GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream)
+{
+   /*
+    * This must be executed in the startup process, since we don't export the
+    * relevant state to shared memory.
+    */
+   Assert(InRecovery);
+
+   *rtime = XLogReceiptTime;
+   *fromStream = (XLogReceiptSource == XLOG_FROM_STREAM);
+}
+
+/*
+ * Note that text field supplied is a parameter name and does not require
+ * translation
+ */
+void
+RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue)
+{
+   if (currValue < minValue)
+   {
+       if (HotStandbyActiveInReplay())
+       {
+           bool        warned_for_promote = false;
+
+           ereport(WARNING,
+                   (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                    errmsg("hot standby is not possible because of insufficient parameter settings"),
+                    errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
+                              param_name,
+                              currValue,
+                              minValue)));
+
+           SetRecoveryPause(true);
+
+           ereport(LOG,
+                   (errmsg("recovery has paused"),
+                    errdetail("If recovery is unpaused, the server will shut down."),
+                    errhint("You can then restart the server after making the necessary configuration changes.")));
+
+           while (GetRecoveryPauseState() != RECOVERY_NOT_PAUSED)
+           {
+               HandleStartupProcInterrupts();
+
+               if (CheckForStandbyTrigger())
+               {
+                   if (!warned_for_promote)
+                       ereport(WARNING,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("promotion is not possible because of insufficient parameter settings"),
+
+                       /*
+                        * Repeat the detail from above so it's easy to find
+                        * in the log.
+                        */
+                                errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
+                                          param_name,
+                                          currValue,
+                                          minValue),
+                                errhint("Restart the server after making the necessary configuration changes.")));
+                   warned_for_promote = true;
+               }
+
+               /*
+                * If recovery pause is requested then set it paused.  While
+                * we are in the loop, user might resume and pause again so
+                * set this every time.
+                */
+               ConfirmRecoveryPaused();
+
+               /*
+                * We wait on a condition variable that will wake us as soon
+                * as the pause ends, but we use a timeout so we can check the
+                * above conditions periodically too.
+                */
+               ConditionVariableTimedSleep(&XLogRecoveryCtl->recoveryNotPausedCV, 1000,
+                                           WAIT_EVENT_RECOVERY_PAUSE);
+           }
+           ConditionVariableCancelSleep();
+       }
+
+       ereport(FATAL,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("recovery aborted because of insufficient parameter settings"),
+       /* Repeat the detail from above so it's easy to find in the log. */
+                errdetail("%s = %d is a lower setting than on the primary server, where its value was %d.",
+                          param_name,
+                          currValue,
+                          minValue),
+                errhint("You can restart the server after making the necessary configuration changes.")));
+   }
+}
diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c

index 90e1c483907ea7be472241f99db03e5632483ca0..54d5f20734bb45f2f8b0c53018b728b85ec09f49 100644 (file)
--- a/src/backend/access/transam/xlogutils.c
+++ b/src/backend/access/transam/xlogutils.c
@@ -20,7 +20,7 @@
  #include <unistd.h>
  
  #include "access/timeline.h"
-#include "access/xlog.h"
+#include "access/xlogrecovery.h"
  #include "access/xlog_internal.h"
  #include "access/xlogutils.h"
  #include "miscadmin.h"
@@ -46,8 +46,8 @@ bool      ignore_invalid_pages = false;
   * process you're running in, use RecoveryInProgress() but only after shared
   * memory startup and lock initialization.
   *
- * This is updated from xlog.c, but lives here because it's mostly read by
- * WAL redo functions.
+ * This is updated from xlog.c and xlogrecovery.c, but lives here because
+ * it's mostly read by WAL redo functions.
   */
  bool       InRecovery = false;
  
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c

index 23f691cd47538842706049190bf2bc8734d5fb77..4488e3a44357682a44928bb0bcbb5471ceb5a857 100644 (file)
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -38,6 +38,7 @@
  
  #include "access/xlog.h"
  #include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
  #include "libpq/pqsignal.h"
  #include "miscadmin.h"
  #include "pgstat.h"
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c

index 32992bafa0f3063686751e3868302aeb50f95409..735fed490b6f24d3f74bbf34ba4f54639f028bef 100644 (file)
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -95,6 +95,7 @@
  
  #include "access/transam.h"
  #include "access/xlog.h"
+#include "access/xlogrecovery.h"
  #include "catalog/pg_control.h"
  #include "common/file_perm.h"
  #include "common/ip.h"
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c

index 9bae16bfc78ff370323b66e1d2e6df6c18166598..29cf8f18e1a8c8b937675d8fa09aed3ff37a89f6 100644 (file)
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -20,6 +20,7 @@
  #include "postgres.h"
  
  #include "access/xlog.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "libpq/pqsignal.h"
  #include "miscadmin.h"
diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c

index 4d71e71f6866244975f212f284e9149179ddd9e2..c29e82307fd7ff7ad06be2ba42cc2307efd7ea59 100644 (file)
--- a/src/backend/replication/logical/logicalfuncs.c
+++ b/src/backend/replication/logical/logicalfuncs.c
@@ -19,6 +19,7 @@
  
  #include "access/xact.h"
  #include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "catalog/pg_type.h"
  #include "fmgr.h"
diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c

index ae6316d908628488ff2eef3f7d5a6843595f66f1..5149ebccb030b9f40281691a2030d37d7168ffa0 100644 (file)
--- a/src/backend/replication/slotfuncs.c
+++ b/src/backend/replication/slotfuncs.c
@@ -14,6 +14,7 @@
  
  #include "access/htup_details.h"
  #include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "funcapi.h"
  #include "miscadmin.h"
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c

index b39fce8c23cb7fbcca42eed04a0057f1abf5bc9c..ceaff097b973af87a322731e9f242b4ec4758278 100644 (file)
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -56,6 +56,7 @@
  #include "access/transam.h"
  #include "access/xlog_internal.h"
  #include "access/xlogarchive.h"
+#include "access/xlogrecovery.h"
  #include "catalog/pg_authid.h"
  #include "catalog/pg_type.h"
  #include "common/ip.h"
diff --git a/src/backend/replication/walreceiverfuncs.c b/src/backend/replication/walreceiverfuncs.c

index c50728ea229b58815430c953d97cbb1080230b1d..90798b9d5377c41e825905f4f51120c8887ca341 100644 (file)
--- a/src/backend/replication/walreceiverfuncs.c
+++ b/src/backend/replication/walreceiverfuncs.c
@@ -23,6 +23,7 @@
  #include <signal.h>
  
  #include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
  #include "pgstat.h"
  #include "postmaster/startup.h"
  #include "replication/walreceiver.h"
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c

index 655760fee3e608289fda82b09e51203c9a5e056b..a1dadd4c6ad7dca7eadd385121a6b540309f50d2 100644 (file)
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -55,6 +55,7 @@
  #include "access/xact.h"
  #include "access/xlog_internal.h"
  #include "access/xlogreader.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "catalog/pg_authid.h"
  #include "catalog/pg_type.h"
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c

index 9f26e41c4641efede3b136902b6b8b2df6cdd599..cd4ebe2fc5ea70370aa896b0e7d30f3ce42e0567 100644 (file)
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -22,6 +22,7 @@
  #include "access/subtrans.h"
  #include "access/syncscan.h"
  #include "access/twophase.h"
+#include "access/xlogrecovery.h"
  #include "commands/async.h"
  #include "miscadmin.h"
  #include "pgstat.h"
@@ -119,6 +120,7 @@ CalculateShmemSize(int *num_semaphores)
     size = add_size(size, PredicateLockShmemSize());
     size = add_size(size, ProcGlobalShmemSize());
     size = add_size(size, XLOGShmemSize());
+   size = add_size(size, XLogRecoveryShmemSize());
     size = add_size(size, CLOGShmemSize());
     size = add_size(size, CommitTsShmemSize());
     size = add_size(size, SUBTRANSShmemSize());
@@ -241,6 +243,7 @@ CreateSharedMemoryAndSemaphores(void)
      * Set up xlog, clog, and buffers
      */
     XLOGShmemInit();
+   XLogRecoveryShmemInit();
     CLOGShmemInit();
     CommitTsShmemInit();
     SUBTRANSShmemInit();
diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c

index 87ac0f74b276149487d9ce24f70ee40483d24397..27361ac8610286d9d7066f7fceda091ee5aea170 100644 (file)
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -20,6 +20,7 @@
  #include "access/twophase.h"
  #include "access/xact.h"
  #include "access/xloginsert.h"
+#include "access/xlogrecovery.h"
  #include "access/xlogutils.h"
  #include "miscadmin.h"
  #include "pgstat.h"
diff --git a/src/backend/storage/sync/sync.c b/src/backend/storage/sync/sync.c

index 543f691f2d2df9f004df562df0d72cc86b7a9f2a..e161d57761ebb3ceaf070696bd662ee2dcbb5b93 100644 (file)
--- a/src/backend/storage/sync/sync.c
+++ b/src/backend/storage/sync/sync.c
@@ -29,6 +29,7 @@
  #include "portability/instr_time.h"
  #include "postmaster/bgwriter.h"
  #include "storage/bufmgr.h"
+#include "storage/fd.h"
  #include "storage/ipc.h"
  #include "storage/md.h"
  #include "utils/hsearch.h"
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c

index e2fe219aa82be5bbf4c1d5c03a21c64717ecee8e..568ac62c2ab68f89065df3a058480a5efe5ceb33 100644 (file)
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -41,6 +41,7 @@
  #include "access/twophase.h"
  #include "access/xact.h"
  #include "access/xlog_internal.h"
+#include "access/xlogrecovery.h"
  #include "catalog/namespace.h"
  #include "catalog/pg_authid.h"
  #include "catalog/storage.h"
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h

index a4b1c1286f2b8a825e119ec4b2b330d0d0b2d2f9..4b45ac64db87c17569608cd3187cb9a2512a5f87 100644 (file)
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -11,13 +11,11 @@
  #ifndef XLOG_H
  #define XLOG_H
  
-#include "access/rmgr.h"
  #include "access/xlogdefs.h"
  #include "access/xlogreader.h"
  #include "datatype/timestamp.h"
  #include "lib/stringinfo.h"
  #include "nodes/pg_list.h"
-#include "storage/fd.h"
  
  
  /* Sync methods */
@@ -28,36 +26,10 @@
  #define SYNC_METHOD_OPEN_DSYNC 4   /* for O_DSYNC */
  extern int sync_method;
  
-/*
- * Recovery target type.
- * Only set during a Point in Time recovery, not when in standby mode.
- */
-typedef enum
-{
-   RECOVERY_TARGET_UNSET,
-   RECOVERY_TARGET_XID,
-   RECOVERY_TARGET_TIME,
-   RECOVERY_TARGET_NAME,
-   RECOVERY_TARGET_LSN,
-   RECOVERY_TARGET_IMMEDIATE
-} RecoveryTargetType;
-
-/*
- * Recovery target TimeLine goal
- */
-typedef enum
-{
-   RECOVERY_TARGET_TIMELINE_CONTROLFILE,
-   RECOVERY_TARGET_TIMELINE_LATEST,
-   RECOVERY_TARGET_TIMELINE_NUMERIC
-} RecoveryTargetTimeLineGoal;
-
  extern XLogRecPtr ProcLastRecPtr;
  extern XLogRecPtr XactLastRecEnd;
  extern PGDLLIMPORT XLogRecPtr XactLastCommitEnd;
  
-extern bool reachedConsistency;
-
  /* these variables are GUC parameters related to XLOG */
  extern int wal_segment_size;
  extern int min_wal_size_mb;
@@ -77,34 +49,10 @@ extern bool wal_recycle;
  extern bool *wal_consistency_checking;
  extern char *wal_consistency_checking_string;
  extern bool log_checkpoints;
-extern char *recoveryRestoreCommand;
-extern char *recoveryEndCommand;
-extern char *archiveCleanupCommand;
-extern bool recoveryTargetInclusive;
-extern int recoveryTargetAction;
-extern int recovery_min_apply_delay;
-extern char *PrimaryConnInfo;
-extern char *PrimarySlotName;
-extern bool wal_receiver_create_temp_slot;
  extern bool track_wal_io_timing;
  
-/* indirectly set via GUC system */
-extern TransactionId recoveryTargetXid;
-extern char *recovery_target_time_string;
-extern const char *recoveryTargetName;
-extern XLogRecPtr recoveryTargetLSN;
-extern RecoveryTargetType recoveryTarget;
-extern char *PromoteTriggerFile;
-extern RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal;
-extern TimeLineID recoveryTargetTLIRequested;
-extern TimeLineID recoveryTargetTLI;
-
  extern int CheckPointSegments;
  
-/* option set locally in startup process only when signal files exist */
-extern bool StandbyModeRequested;
-extern bool StandbyMode;
-
  /* Archive modes */
  typedef enum ArchiveMode
  {
@@ -138,14 +86,6 @@ typedef enum RecoveryState
     RECOVERY_STATE_DONE         /* currently in production */
  } RecoveryState;
  
-/* Recovery pause states */
-typedef enum RecoveryPauseState
-{
-   RECOVERY_NOT_PAUSED,        /* pause not requested */
-   RECOVERY_PAUSE_REQUESTED,   /* pause requested, but not yet paused */
-   RECOVERY_PAUSED             /* recovery is paused */
-} RecoveryPauseState;
-
  extern PGDLLIMPORT int wal_level;
  
  /* Is WAL archiving enabled (always or only while server is running normally)? */
@@ -274,19 +214,10 @@ extern void issue_xlog_fsync(int fd, XLogSegNo segno, TimeLineID tli);
  
  extern bool RecoveryInProgress(void);
  extern RecoveryState GetRecoveryState(void);
-extern bool HotStandbyActive(void);
-extern bool HotStandbyActiveInReplay(void);
  extern bool XLogInsertAllowed(void);
-extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
-extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
  extern XLogRecPtr GetXLogInsertRecPtr(void);
  extern XLogRecPtr GetXLogWriteRecPtr(void);
-extern RecoveryPauseState GetRecoveryPauseState(void);
-extern void SetRecoveryPause(bool recoveryPause);
-extern TimestampTz GetLatestXTime(void);
-extern TimestampTz GetCurrentChunkReplayStartTime(void);
  
-extern void UpdateControlFile(void);
  extern uint64 GetSystemIdentifier(void);
  extern char *GetMockAuthenticationNonce(void);
  extern bool DataChecksumsEnabled(void);
@@ -310,19 +241,23 @@ extern XLogRecPtr GetInsertRecPtr(void);
  extern XLogRecPtr GetFlushRecPtr(TimeLineID *insertTLI);
  extern TimeLineID GetWALInsertionTimeLine(void);
  extern XLogRecPtr GetLastImportantRecPtr(void);
-extern void RemovePromoteSignalFiles(void);
  
-extern bool PromoteIsTriggered(void);
-extern bool CheckPromoteSignal(void);
-extern void WakeupRecovery(void);
  extern void SetWalWriterSleeping(bool sleeping);
  
-extern void StartupRequestWalReceiverRestart(void);
-extern void XLogRequestWalReceiverReply(void);
-
  extern void assign_max_wal_size(int newval, void *extra);
  extern void assign_checkpoint_completion_target(double newval, void *extra);
  
+/*
+ * Routines used by xlogrecovery.c to call back into xlog.c during recovery.
+ */
+extern void RemoveNonParentXlogFiles(XLogRecPtr switchpoint, TimeLineID newTLI);
+extern bool XLogCheckpointNeeded(XLogSegNo new_segno);
+extern void SwitchIntoArchiveRecovery(XLogRecPtr EndRecPtr, TimeLineID replayTLI);
+extern void ReachedEndOfBackup(XLogRecPtr EndRecPtr, TimeLineID tli);
+extern void SetInstallXLogFileSegmentActive(void);
+extern bool IsInstallXLogFileSegmentActive(void);
+extern void XLogShutdownWalRcv(void);
+
  /*
   * Routines to start, stop, and get status of a base backup.
   */
diff --git a/src/include/access/xlogrecovery.h b/src/include/access/xlogrecovery.h

new file mode 100644 (file)

index 0000000..75a0f5f
--- /dev/null
+++ b/src/include/access/xlogrecovery.h
@@ -0,0 +1,157 @@
+/*
+ * xlogrecovery.h
+ *
+ * Functions for WAL recovery and standby mode
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/access/xlogrecovery.h
+ */
+#ifndef XLOGRECOVERY_H
+#define XLOGRECOVERY_H
+
+#include "access/xlogreader.h"
+#include "catalog/pg_control.h"
+#include "lib/stringinfo.h"
+#include "utils/timestamp.h"
+
+/*
+ * Recovery target type.
+ * Only set during a Point in Time recovery, not when in standby mode.
+ */
+typedef enum
+{
+   RECOVERY_TARGET_UNSET,
+   RECOVERY_TARGET_XID,
+   RECOVERY_TARGET_TIME,
+   RECOVERY_TARGET_NAME,
+   RECOVERY_TARGET_LSN,
+   RECOVERY_TARGET_IMMEDIATE
+} RecoveryTargetType;
+
+/*
+ * Recovery target TimeLine goal
+ */
+typedef enum
+{
+   RECOVERY_TARGET_TIMELINE_CONTROLFILE,
+   RECOVERY_TARGET_TIMELINE_LATEST,
+   RECOVERY_TARGET_TIMELINE_NUMERIC
+} RecoveryTargetTimeLineGoal;
+
+/* Recovery pause states */
+typedef enum RecoveryPauseState
+{
+   RECOVERY_NOT_PAUSED,        /* pause not requested */
+   RECOVERY_PAUSE_REQUESTED,   /* pause requested, but not yet paused */
+   RECOVERY_PAUSED             /* recovery is paused */
+} RecoveryPauseState;
+
+/* User-settable GUC parameters */
+extern bool recoveryTargetInclusive;
+extern int recoveryTargetAction;
+extern int recovery_min_apply_delay;
+extern char *PrimaryConnInfo;
+extern char *PrimarySlotName;
+extern char *recoveryRestoreCommand;
+extern char *recoveryEndCommand;
+extern char *archiveCleanupCommand;
+
+/* indirectly set via GUC system */
+extern TransactionId recoveryTargetXid;
+extern char *recovery_target_time_string;
+extern TimestampTz recoveryTargetTime;
+extern const char *recoveryTargetName;
+extern XLogRecPtr recoveryTargetLSN;
+extern RecoveryTargetType recoveryTarget;
+extern char *PromoteTriggerFile;
+extern bool wal_receiver_create_temp_slot;
+extern RecoveryTargetTimeLineGoal recoveryTargetTimeLineGoal;
+extern TimeLineID recoveryTargetTLIRequested;
+extern TimeLineID recoveryTargetTLI;
+
+/* Have we already reached a consistent database state? */
+extern bool reachedConsistency;
+
+/* Are we currently in standby mode? */
+extern bool StandbyMode;
+
+extern Size XLogRecoveryShmemSize(void);
+extern void XLogRecoveryShmemInit(void);
+
+extern void InitWalRecovery(ControlFileData *ControlFile, bool *wasShutdownPtr, bool *haveBackupLabel, bool *haveTblspcMap);
+extern void PerformWalRecovery(void);
+
+/*
+ * FinishWalRecovery() returns this.  It contains information about the point
+ * where recovery ended, and why it ended.
+ */
+typedef struct
+{
+   /*
+    * Information about the last valid or applied record, after which new WAL
+    * can be appended.  'lastRec' is the position where the last record
+    * starts, and 'endOfLog' is its end.  'lastPage' is a copy of the last
+    * partial page that contains endOfLog (or NULL if endOfLog is exactly at
+    * page boundary).  'lastPageBeginPtr' is the position where the last page
+    * begins.
+    *
+    * endOfLogTLI is the TLI in the filename of the XLOG segment containing
+    * the last applied record.  It could be different from lastRecTLI, if
+    * there was a timeline switch in that segment, and we were reading the
+    * old WAL from a segment belonging to a higher timeline.
+    */
+   XLogRecPtr  lastRec;        /* start of last valid or applied record */
+   TimeLineID  lastRecTLI;
+   XLogRecPtr  endOfLog;       /* end of last valid or applied record */
+   TimeLineID  endOfLogTLI;
+
+   XLogRecPtr  lastPageBeginPtr;   /* LSN of page that contains endOfLog */
+   char       *lastPage;       /* copy of the last page, up to endOfLog */
+
+   /*
+    * abortedRecPtr is the start pointer of a broken record at end of WAL
+    * when recovery completes; missingContrecPtr is the location of the first
+    * contrecord that went missing.  See CreateOverwriteContrecordRecord for
+    * details.
+    */
+   XLogRecPtr  abortedRecPtr;
+   XLogRecPtr  missingContrecPtr;
+
+   /* short human-readable string describing why recovery ended */
+   char       *recoveryStopReason;
+
+   /*
+    * If standby or recovery signal file was found, these flags are set
+    * accordingly.
+    */
+   bool        standby_signal_file_found;
+   bool        recovery_signal_file_found;
+} EndOfWalRecoveryInfo;
+
+extern EndOfWalRecoveryInfo *FinishWalRecovery(void);
+extern void ShutdownWalRecovery(void);
+extern void RemovePromoteSignalFiles(void);
+
+extern bool HotStandbyActive(void);
+extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI);
+extern RecoveryPauseState GetRecoveryPauseState(void);
+extern void SetRecoveryPause(bool recoveryPause);
+extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
+extern TimestampTz GetLatestXTime(void);
+extern TimestampTz GetCurrentChunkReplayStartTime(void);
+extern XLogRecPtr GetCurrentReplayRecPtr(TimeLineID *replayEndTLI);
+
+extern bool PromoteIsTriggered(void);
+extern bool CheckPromoteSignal(void);
+extern void WakeupRecovery(void);
+
+extern void StartupRequestWalReceiverRestart(void);
+extern void XLogRequestWalReceiverReply(void);
+
+extern void RecoveryRequiresIntParameter(const char *param_name, int currValue, int minValue);
+
+extern void xlog_outdesc(StringInfo buf, XLogReaderState *record);
+
+#endif                         /* XLOGRECOVERY_H */
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index bfb7802f2d3cac942d5e79375cf391a228cde49b..15684f53baabf9e16a8fe4e818217aeb9aa3b576 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -607,6 +607,7 @@ EndDirectModify_function
  EndForeignInsert_function
  EndForeignModify_function
  EndForeignScan_function
+EndOfWalRecoveryInfo
  EndSampleScan_function
  EnumItem
  EolType
@@ -2945,6 +2946,7 @@ XLogRecordBlockCompressHeader
  XLogRecordBlockHeader
  XLogRecordBlockImageHeader
  XLogRecordBuffer
+XLogRecoveryCtlData
  XLogRedoAction
  XLogSegNo
  XLogSource
author	Heikki Linnakangas <[email protected]>
	Wed, 16 Feb 2022 07:30:38 +0000 (09:30 +0200)
committer	Heikki Linnakangas <[email protected]>
	Wed, 16 Feb 2022 07:30:38 +0000 (09:30 +0200)
contrib/pg_prewarm/autoprewarm.c		patch \| blob \| blame \| history
src/backend/access/transam/Makefile		patch \| blob \| blame \| history
src/backend/access/transam/xact.c		patch \| blob \| blame \| history
src/backend/access/transam/xlog.c		patch \| blob \| blame \| history
src/backend/access/transam/xlogfuncs.c		patch \| blob \| blame \| history
src/backend/access/transam/xlogrecovery.c	[new file with mode: 0644]	patch \| blob
src/backend/access/transam/xlogutils.c		patch \| blob \| blame \| history
src/backend/postmaster/checkpointer.c		patch \| blob \| blame \| history
src/backend/postmaster/postmaster.c		patch \| blob \| blame \| history
src/backend/postmaster/startup.c		patch \| blob \| blame \| history
src/backend/replication/logical/logicalfuncs.c		patch \| blob \| blame \| history
src/backend/replication/slotfuncs.c		patch \| blob \| blame \| history
src/backend/replication/walreceiver.c		patch \| blob \| blame \| history
src/backend/replication/walreceiverfuncs.c		patch \| blob \| blame \| history
src/backend/replication/walsender.c		patch \| blob \| blame \| history
src/backend/storage/ipc/ipci.c		patch \| blob \| blame \| history
src/backend/storage/ipc/standby.c		patch \| blob \| blame \| history
src/backend/storage/sync/sync.c		patch \| blob \| blame \| history
src/backend/utils/misc/guc.c		patch \| blob \| blame \| history
src/include/access/xlog.h		patch \| blob \| blame \| history
src/include/access/xlogrecovery.h	[new file with mode: 0644]	patch \| blob
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history