Report progress of startup operations that take a long time.
authorRobert Haas <[email protected]>
Mon, 25 Oct 2021 15:51:57 +0000 (11:51 -0400)
committerRobert Haas <[email protected]>
Mon, 25 Oct 2021 15:51:57 +0000 (11:51 -0400)
Users sometimes get concerned whe they start the server and it
emits a few messages and then doesn't emit any more messages for
a long time. Generally, what's happening is either that the
system is taking a long time to apply WAL, or it's taking a
long time to reset unlogged relations, or it's taking a long
time to fsync the data directory, but it's not easy to tell
which is the case.

To fix that, add a new 'log_startup_progress_interval' setting,
by default 10s. When an operation that is known to be potentially
long-running takes more than this amount of time, we'll log a
status update each time this interval elapses.

To avoid undesirable log chatter, don't log anything about WAL
replay when in standby mode.

Nitin Jadhav and Robert Haas, reviewed by Amul Sul, Bharath
Rupireddy, Justin Pryzby, Michael Paquier, and Álvaro Herrera.

Discussion: https://postgr.es/m/CA+TgmoaHQrgDFOBwgY16XCoMtXxsrVGFB2jNCvb7-ubuEe1MGg@mail.gmail.com
Discussion: https://postgr.es/m/CAMm1aWaHF7VE69572_OLQ+MgpT5RUiUDgF1x5RrtkJBLdpRj3Q@mail.gmail.com

doc/src/sgml/config.sgml
src/backend/access/transam/xlog.c
src/backend/nls.mk
src/backend/postmaster/startup.c
src/backend/storage/file/fd.c
src/backend/storage/file/reinit.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/include/postmaster/startup.h
src/include/utils/timeout.h

index 0bcc6fd3222fd71137325f9879b732f5c10334d8..4c73fe6ce8ee21076f0b17716bfbb9d6769ace5c 100644 (file)
@@ -6549,6 +6549,30 @@ local0.*    /var/log/postgresql
        </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-log-startup-progress-interval" xreflabel="log_startup_progress_interval">
+      <term><varname>log_startup_progress_interval</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>log_startup_progress_interval</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+       <listitem>
+        <para>
+         Sets the amount of time after which the startup process will log
+         a message about a long-running operation that is still in progress,
+         as well as the interval between further progress messages for that
+         operation. This setting is applied separately to each operation.
+         For example, if syncing the data directory takes 25 seconds and
+         thereafter resetting unlogged relations takes 8 seconds, and if this
+         setting has the default value of 10 seconds, then a messages will be
+         logged for syncing the data directory after it has been in progress
+         for 10 seconds and again after it has been in progress for 20 seconds,
+         but nothing will be logged for resetting unlogged operations.
+         A setting of <literal>0</literal> disables the feature. If this value
+         is specified without units, it is taken as milliseconds.
+        </para>
+       </listitem>
+     </varlistentry>
+
      </variablelist>
 
     <para>
index 142506834c49e07dac9f3f27fe055b4109b6ff72..cd553d6e12f5b20813160df90bcd8cc088064827 100644 (file)
@@ -79,6 +79,7 @@
 #include "utils/relmapper.h"
 #include "utils/pg_rusage.h"
 #include "utils/snapmgr.h"
+#include "utils/timeout.h"
 #include "utils/timestamp.h"
 
 extern uint32 bootstrap_data_checksum_version;
@@ -6718,6 +6719,11 @@ StartupXLOG(void)
         */
        ValidateXLOGDirectoryStructure();
 
+       /* Set up timeout handler needed to report startup progress. */
+       if (!IsBootstrapProcessingMode())
+               RegisterTimeout(STARTUP_PROGRESS_TIMEOUT,
+                                               startup_progress_timeout_handler);
+
        /*----------
         * If we previously crashed, perform a couple of actions:
         *
@@ -7491,6 +7497,10 @@ StartupXLOG(void)
                                        (errmsg("redo starts at %X/%X",
                                                        LSN_FORMAT_ARGS(ReadRecPtr))));
 
+                       /* Prepare to report progress of the redo phase. */
+                       if (!StandbyMode)
+                               begin_startup_progress_phase();
+
                        /*
                         * main redo apply loop
                         */
@@ -7498,6 +7508,10 @@ StartupXLOG(void)
                        {
                                bool            switchedTLI = false;
 
+                               if (!StandbyMode)
+                                       ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+                                                                                        LSN_FORMAT_ARGS(ReadRecPtr));
+
 #ifdef WAL_DEBUG
                                if (XLOG_DEBUG ||
                                        (rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
index 771b58d4f45bf8b9d287b645d113d96fda09cc40..355299804f3918fb53ef3233b79aa133705ac188 100644 (file)
@@ -10,13 +10,15 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
     yyerror \
     parser_yyerror \
     scanner_yyerror \
-    report_invalid_record:2
+    report_invalid_record:2 \
+    ereport_startup_progress
 GETTEXT_FLAGS    = $(BACKEND_COMMON_GETTEXT_FLAGS) \
     GUC_check_errmsg:1:c-format \
     GUC_check_errdetail:1:c-format \
     GUC_check_errhint:1:c-format \
     write_stderr:1:c-format \
-    report_invalid_record:2:c-format
+    report_invalid_record:2:c-format \
+    ereport_startup_progress:1:c-format
 
 gettext-files: distprep
        find $(srcdir) $(srcdir)/../common $(srcdir)/../port -name '*.c' -print | LC_ALL=C sort >$@
index 0f4f00d6895194cf2d6e4aa9daeaf0439ddb0473..28e68dd8715ecbff85ae7cbc19785bd2a7caef87 100644 (file)
@@ -59,6 +59,22 @@ static volatile sig_atomic_t promote_signaled = false;
  */
 static volatile sig_atomic_t in_restore_command = false;
 
+/*
+ * Time at which the most recent startup operation started.
+ */
+static TimestampTz startup_progress_phase_start_time;
+
+/*
+ * Indicates whether the startup progress interval mentioned by the user is
+ * elapsed or not. TRUE if timeout occurred, FALSE otherwise.
+ */
+static volatile sig_atomic_t startup_progress_timer_expired = false;
+
+/*
+ * Time between progress updates for long-running startup operations.
+ */
+int log_startup_progress_interval = 10000;     /* 10 sec */
+
 /* Signal handlers */
 static void StartupProcTriggerHandler(SIGNAL_ARGS);
 static void StartupProcSigHupHandler(SIGNAL_ARGS);
@@ -282,3 +298,58 @@ ResetPromoteSignaled(void)
 {
        promote_signaled = false;
 }
+
+/*
+ * Set a flag indicating that it's time to log a progress report.
+ */
+void
+startup_progress_timeout_handler(void)
+{
+       startup_progress_timer_expired = true;
+}
+
+/*
+ * Set the start timestamp of the current operation and enable the timeout.
+ */
+void
+begin_startup_progress_phase(void)
+{
+       TimestampTz fin_time;
+
+       /* Feature is disabled. */
+       if (log_startup_progress_interval == 0)
+               return;
+
+       startup_progress_phase_start_time = GetCurrentTimestamp();
+       fin_time = TimestampTzPlusMilliseconds(startup_progress_phase_start_time,
+                                                                                  log_startup_progress_interval);
+       enable_timeout_every(STARTUP_PROGRESS_TIMEOUT, fin_time,
+                                                log_startup_progress_interval);
+}
+
+/*
+ * Report whether startup progress timeout has occurred. Reset the timer flag
+ * if it did, set the elapsed time to the out parameters and return true,
+ * otherwise return false.
+ */
+bool
+has_startup_progress_timeout_expired(long *secs, int *usecs)
+{
+       long            seconds;
+       int                     useconds;
+       TimestampTz now;
+
+       /* No timeout has occurred. */
+       if (!startup_progress_timer_expired)
+               return false;
+
+       /* Calculate the elapsed time. */
+       now = GetCurrentTimestamp();
+       TimestampDifference(startup_progress_phase_start_time, now, &seconds, &useconds);
+
+       *secs = seconds;
+       *usecs = useconds;
+       startup_progress_timer_expired = false;
+
+       return true;
+}
index f9cda6906d2ba381d3a17e972149418b718eac44..cb1a8dd34f2f3e79756e4b996e0c7a9a54282ad7 100644 (file)
@@ -96,6 +96,7 @@
 #include "pgstat.h"
 #include "port/pg_iovec.h"
 #include "portability/mem.h"
+#include "postmaster/startup.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "utils/guc.h"
@@ -3381,6 +3382,9 @@ do_syncfs(const char *path)
 {
        int                     fd;
 
+       ereport_startup_progress("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
+                                                        path);
+
        fd = OpenTransientFile(path, O_RDONLY);
        if (fd < 0)
        {
@@ -3465,6 +3469,9 @@ SyncDataDirectory(void)
                 * directories.
                 */
 
+               /* Prepare to report progress syncing the data directory via syncfs. */
+               begin_startup_progress_phase();
+
                /* Sync the top level pgdata directory. */
                do_syncfs(".");
                /* If any tablespaces are configured, sync each of those. */
@@ -3487,18 +3494,24 @@ SyncDataDirectory(void)
        }
 #endif                                                 /* !HAVE_SYNCFS */
 
+#ifdef PG_FLUSH_DATA_WORKS
+       /* Prepare to report progress of the pre-fsync phase. */
+       begin_startup_progress_phase();
+
        /*
         * If possible, hint to the kernel that we're soon going to fsync the data
         * directory and its contents.  Errors in this step are even less
         * interesting than normal, so log them only at DEBUG1.
         */
-#ifdef PG_FLUSH_DATA_WORKS
        walkdir(".", pre_sync_fname, false, DEBUG1);
        if (xlog_is_symlink)
                walkdir("pg_wal", pre_sync_fname, false, DEBUG1);
        walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
 #endif
 
+       /* Prepare to report progress syncing the data directory via fsync. */
+       begin_startup_progress_phase();
+
        /*
         * Now we do the fsync()s in the same order.
         *
@@ -3601,6 +3614,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
        if (isdir)
                return;
 
+       ereport_startup_progress("syncing data directory (pre-fsync), elapsed time: %ld.%02d s, current path: %s",
+                                                        fname);
+
        fd = OpenTransientFile(fname, O_RDONLY | PG_BINARY);
 
        if (fd < 0)
@@ -3630,6 +3646,9 @@ pre_sync_fname(const char *fname, bool isdir, int elevel)
 static void
 datadir_fsync_fname(const char *fname, bool isdir, int elevel)
 {
+       ereport_startup_progress("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
+                                                        fname);
+
        /*
         * We want to silently ignoring errors about unreadable files.  Pass that
         * desire on to fsync_fname_ext().
index 40c758d789ddb0217407716b72cd825417dff2af..0ae3fb6902109bce30ef160b8cba81d16b05db4f 100644 (file)
@@ -17,6 +17,7 @@
 #include <unistd.h>
 
 #include "common/relpath.h"
+#include "postmaster/startup.h"
 #include "storage/copydir.h"
 #include "storage/fd.h"
 #include "storage/reinit.h"
@@ -65,6 +66,9 @@ ResetUnloggedRelations(int op)
                                                                   ALLOCSET_DEFAULT_SIZES);
        oldctx = MemoryContextSwitchTo(tmpctx);
 
+       /* Prepare to report progress resetting unlogged relations. */
+       begin_startup_progress_phase();
+
        /*
         * First process unlogged files in pg_default ($PGDATA/base)
         */
@@ -136,6 +140,14 @@ ResetUnloggedRelationsInTablespaceDir(const char *tsdirname, int op)
 
                snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
                                 tsdirname, de->d_name);
+
+               if (op & UNLOGGED_RELATION_INIT)
+                       ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
+                                                                        dbspace_path);
+               else if (op & UNLOGGED_RELATION_CLEANUP)
+                       ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
+                                                                        dbspace_path);
+
                ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
        }
 
index d2ce4a845065787934c2196bde74884eb19dc5de..e91d5a3cfdabd848c56b95e91a356eddaeccfdb7 100644 (file)
@@ -71,6 +71,7 @@
 #include "postmaster/bgworker_internals.h"
 #include "postmaster/bgwriter.h"
 #include "postmaster/postmaster.h"
+#include "postmaster/startup.h"
 #include "postmaster/syslogger.h"
 #include "postmaster/walwriter.h"
 #include "replication/logicallauncher.h"
@@ -3570,6 +3571,18 @@ static struct config_int ConfigureNamesInt[] =
                check_client_connection_check_interval, NULL, NULL
        },
 
+       {
+               {"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+                       gettext_noop("Time between progress updates for "
+                                                "long-running startup operations."),
+                       gettext_noop("0 turns this feature off."),
+                       GUC_UNIT_MS,
+               },
+               &log_startup_progress_interval,
+               10000, 0, INT_MAX,
+               NULL, NULL, NULL
+       },
+
        /* End-of-list marker */
        {
                {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
index 3fe9a53cb3fdeed1763fe0822ca0ca9de1846084..1cbc9feeb6f44ce635d7479e5255eae4cf67cde5 100644 (file)
                                        # are logged regardless of their duration; 1.0 logs all
                                        # statements from all transactions, 0.0 never logs
 
+#log_startup_progress_interval = 10s   # Time between progress updates for
+                                                                               # long-running startup operations.
+                                                                               # 0 disables the feature, > 0 indicates
+                                                                               # the interval in milliseconds.
+
 # - What to Log -
 
 #debug_print_parse = off
index bf6adf1d36e79be1b035a774ecb0bf91d1ee2d6d..2fb208bdb548b58752dd605098145f3e1a9ab0a9 100644 (file)
 #ifndef _STARTUP_H
 #define _STARTUP_H
 
+/*
+ * Log the startup progress message if a timer has expired.
+ */
+#define ereport_startup_progress(msg, ...) \
+       do { \
+               long    secs; \
+               int     usecs; \
+               if (has_startup_progress_timeout_expired(&secs, &usecs)) \
+                       ereport(LOG, errmsg(msg, secs, (usecs / 10000),  __VA_ARGS__ )); \
+       } while(0)
+
+extern int  log_startup_progress_interval;
+
 extern void HandleStartupProcInterrupts(void);
 extern void StartupProcessMain(void) pg_attribute_noreturn();
 extern void PreRestoreCommand(void);
@@ -19,4 +32,8 @@ extern void PostRestoreCommand(void);
 extern bool IsPromoteSignaled(void);
 extern void ResetPromoteSignaled(void);
 
+extern void begin_startup_progress_phase(void);
+extern void startup_progress_timeout_handler(void);
+extern bool has_startup_progress_timeout_expired(long *secs, int *usecs);
+
 #endif                                                 /* _STARTUP_H */
index 1b13ac96e0e4788c35ecbdf78e928b39749a9167..2cbc5de4d99526557279333fb1c13253fbdb8e73 100644 (file)
@@ -33,6 +33,7 @@ typedef enum TimeoutId
        IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
        IDLE_SESSION_TIMEOUT,
        CLIENT_CONNECTION_CHECK_TIMEOUT,
+       STARTUP_PROGRESS_TIMEOUT,
        /* First user-definable timeout reason */
        USER_TIMEOUT,
        /* Maximum number of timeout reasons */