Refactor WaitForLSNReplay() to return the result of waiting
authorAlexander Korotkov <[email protected]>
Thu, 24 Oct 2024 11:38:27 +0000 (14:38 +0300)
committerAlexander Korotkov <[email protected]>
Thu, 24 Oct 2024 11:38:27 +0000 (14:38 +0300)
Currently, WaitForLSNReplay() immediately throws an error if waiting for LSN
replay is not successful.  This commit teaches  WaitForLSNReplay() to return
the result of waiting, while making pg_wal_replay_wait() responsible for
throwing an appropriate error.

This is preparation to adding 'no_error' argument to pg_wal_replay_wait() and
new function pg_wal_replay_wait_status(), which returns the last wait result
status.

Additionally, we stop distinguishing situations when we find our instance to
be not in a recovery state before entering the waiting loop and inside
the waiting loop.  Standby promotion may happen at any moment, even between
issuing a procedure call statement and pg_wal_replay_wait() doing a first
check of recovery status.  Thus, there is no pointing distinguishing these
situations.

Also, since we may exit the waiting loop and see our instance not in recovery
without throwing an error, we need to deleteLSNWaiter() in that case. We do
this unconditionally for the sake of simplicity, even if standby was already
promoted after reaching the target LSN, the startup process surely already
deleted us.

Reported-by: Michael Paquier
Discussion: https://postgr.es/m/ZtUF17gF0pNpwZDI%40paquier.xyz
Reviewed-by: Michael Paquier, Pavel Borisov
src/backend/access/transam/xlogfuncs.c
src/backend/access/transam/xlogwait.c
src/include/access/xlogwait.h
src/tools/pgindent/typedefs.list

index cbf84ef7d8f7478b681bf1c154d5cd8919f96ba0..ddca78d371732b7102b150cf43f2a75e5e4d4005 100644 (file)
@@ -759,6 +759,7 @@ pg_wal_replay_wait(PG_FUNCTION_ARGS)
 {
    XLogRecPtr  target_lsn = PG_GETARG_LSN(0);
    int64       timeout = PG_GETARG_INT64(1);
+   WaitLSNResult result;
 
    if (timeout < 0)
        ereport(ERROR,
@@ -799,7 +800,35 @@ pg_wal_replay_wait(PG_FUNCTION_ARGS)
     */
    Assert(MyProc->xmin == InvalidTransactionId);
 
-   (void) WaitForLSNReplay(target_lsn, timeout);
+   result = WaitForLSNReplay(target_lsn, timeout);
+
+   /*
+    * Process the result of WaitForLSNReplay().  Throw appropriate error if
+    * needed.
+    */
+   switch (result)
+   {
+       case WAIT_LSN_RESULT_SUCCESS:
+           /* Nothing to do on success */
+           break;
+
+       case WAIT_LSN_RESULT_TIMEOUT:
+           ereport(ERROR,
+                   (errcode(ERRCODE_QUERY_CANCELED),
+                    errmsg("timed out while waiting for target LSN %X/%X to be replayed; current replay LSN %X/%X",
+                           LSN_FORMAT_ARGS(target_lsn),
+                           LSN_FORMAT_ARGS(GetXLogReplayRecPtr(NULL)))));
+           break;
+
+       case WAIT_LSN_RESULT_NOT_IN_RECOVERY:
+           ereport(ERROR,
+                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+                    errmsg("recovery is not in progress"),
+                    errdetail("Recovery ended before replaying target LSN %X/%X; last replay LSN %X/%X.",
+                              LSN_FORMAT_ARGS(target_lsn),
+                              LSN_FORMAT_ARGS(GetXLogReplayRecPtr(NULL)))));
+           break;
+   }
 
    PG_RETURN_VOID();
 }
index 353b7854dc86ba6a4548bfdc6544e1820b7e274f..58fb10aa5a88e9526a2a7ac442435f7642eb6aad 100644 (file)
@@ -217,7 +217,7 @@ WaitLSNCleanup(void)
  * Wait using MyLatch till the given LSN is replayed, the postmaster dies or
  * timeout happens.
  */
-void
+WaitLSNResult
 WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
 {
    XLogRecPtr  currentLSN;
@@ -240,17 +240,14 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
         * check the last replay LSN before reporting an error.
         */
        if (targetLSN <= GetXLogReplayRecPtr(NULL))
-           return;
-       ereport(ERROR,
-               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                errmsg("recovery is not in progress"),
-                errhint("Waiting for LSN can only be executed during recovery.")));
+           return WAIT_LSN_RESULT_SUCCESS;
+       return WAIT_LSN_RESULT_NOT_IN_RECOVERY;
    }
    else
    {
        /* If target LSN is already replayed, exit immediately */
        if (targetLSN <= GetXLogReplayRecPtr(NULL))
-           return;
+           return WAIT_LSN_RESULT_SUCCESS;
    }
 
    if (timeout > 0)
@@ -276,17 +273,13 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
        {
            /*
             * Recovery was ended, but recheck if target LSN was already
-            * replayed.
+            * replayed.  See the comment regarding deleteLSNWaiter() below.
             */
+           deleteLSNWaiter();
            currentLSN = GetXLogReplayRecPtr(NULL);
            if (targetLSN <= currentLSN)
-               return;
-           ereport(ERROR,
-                   (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                    errmsg("recovery is not in progress"),
-                    errdetail("Recovery ended before replaying target LSN %X/%X; last replay LSN %X/%X.",
-                              LSN_FORMAT_ARGS(targetLSN),
-                              LSN_FORMAT_ARGS(currentLSN))));
+               return WAIT_LSN_RESULT_SUCCESS;
+           return WAIT_LSN_RESULT_NOT_IN_RECOVERY;
        }
        else
        {
@@ -338,11 +331,7 @@ WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout)
     * If we didn't reach the target LSN, we must be exited by timeout.
     */
    if (targetLSN > currentLSN)
-   {
-       ereport(ERROR,
-               (errcode(ERRCODE_QUERY_CANCELED),
-                errmsg("timed out while waiting for target LSN %X/%X to be replayed; current replay LSN %X/%X",
-                       LSN_FORMAT_ARGS(targetLSN),
-                       LSN_FORMAT_ARGS(currentLSN))));
-   }
+       return WAIT_LSN_RESULT_TIMEOUT;
+
+   return WAIT_LSN_RESULT_SUCCESS;
 }
index 31e208cb7ade41595eaed3a31091dbaa7f48ffe5..eb2260aa2ecff4c85861270c4354ba3cb95090a8 100644 (file)
@@ -70,12 +70,23 @@ typedef struct WaitLSNState
    WaitLSNProcInfo procInfos[FLEXIBLE_ARRAY_MEMBER];
 } WaitLSNState;
 
+/*
+ * Result statuses for WaitForLSNReplay().
+ */
+typedef enum
+{
+   WAIT_LSN_RESULT_SUCCESS,    /* Target LSN is reached */
+   WAIT_LSN_RESULT_TIMEOUT,    /* Timeout occurred */
+   WAIT_LSN_RESULT_NOT_IN_RECOVERY,    /* Recovery ended before or during our
+                                        * wait */
+} WaitLSNResult;
+
 extern PGDLLIMPORT WaitLSNState *waitLSNState;
 
 extern Size WaitLSNShmemSize(void);
 extern void WaitLSNShmemInit(void);
 extern void WaitLSNSetLatches(XLogRecPtr currentLSN);
 extern void WaitLSNCleanup(void);
-extern void WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout);
+extern WaitLSNResult WaitForLSNReplay(XLogRecPtr targetLSN, int64 timeout);
 
 #endif                         /* XLOG_WAIT_H */
index 4b8139c4b47987223a20d045fd95c59c5143a371..110089695e19ddfc5338819305537d54480f6e89 100644 (file)
@@ -3126,6 +3126,7 @@ WaitEventIPC
 WaitEventSet
 WaitEventTimeout
 WaitLSNProcInfo
+WaitLSNResult
 WaitLSNState
 WaitPMResult
 WalCloseMethod