</listitem>
</varlistentry>
+ <varlistentry id="guc-log-startup-progress-interval" xreflabel="log_startup_progress_interval">
+ <term><varname>log_startup_progress_interval</varname> (<type>integer</type>)
+ <indexterm>
+ <primary><varname>log_startup_progress_interval</varname> configuration parameter</primary>
+ </indexterm>
+ </term>
+ <listitem>
+ <para>
+ Sets the amount of time after which the startup process will log
+ a message about a long-running operation that is still in progress,
+ as well as the interval between further progress messages for that
+ operation. This setting is applied separately to each operation.
+ For example, if syncing the data directory takes 25 seconds and
+ thereafter resetting unlogged relations takes 8 seconds, and if this
+ setting has the default value of 10 seconds, then a messages will be
+ logged for syncing the data directory after it has been in progress
+ for 10 seconds and again after it has been in progress for 20 seconds,
+ but nothing will be logged for resetting unlogged operations.
+ A setting of <literal>0</literal> disables the feature. If this value
+ is specified without units, it is taken as milliseconds.
+ </para>
+ </listitem>
+ </varlistentry>
+
</variablelist>
<para>
#include "utils/relmapper.h"
#include "utils/pg_rusage.h"
#include "utils/snapmgr.h"
+#include "utils/timeout.h"
#include "utils/timestamp.h"
extern uint32 bootstrap_data_checksum_version;
*/
ValidateXLOGDirectoryStructure();
+ /* Set up timeout handler needed to report startup progress. */
+ if (!IsBootstrapProcessingMode())
+ RegisterTimeout(STARTUP_PROGRESS_TIMEOUT,
+ startup_progress_timeout_handler);
+
/*----------
* If we previously crashed, perform a couple of actions:
*
(errmsg("redo starts at %X/%X",
LSN_FORMAT_ARGS(ReadRecPtr))));
+ /* Prepare to report progress of the redo phase. */
+ if (!StandbyMode)
+ begin_startup_progress_phase();
+
/*
* main redo apply loop
*/
{
bool switchedTLI = false;
+ if (!StandbyMode)
+ ereport_startup_progress("redo in progress, elapsed time: %ld.%02d s, current LSN: %X/%X",
+ LSN_FORMAT_ARGS(ReadRecPtr));
+
#ifdef WAL_DEBUG
if (XLOG_DEBUG ||
(rmid == RM_XACT_ID && trace_recovery_messages <= DEBUG2) ||
yyerror \
parser_yyerror \
scanner_yyerror \
- report_invalid_record:2
+ report_invalid_record:2 \
+ ereport_startup_progress
GETTEXT_FLAGS = $(BACKEND_COMMON_GETTEXT_FLAGS) \
GUC_check_errmsg:1:c-format \
GUC_check_errdetail:1:c-format \
GUC_check_errhint:1:c-format \
write_stderr:1:c-format \
- report_invalid_record:2:c-format
+ report_invalid_record:2:c-format \
+ ereport_startup_progress:1:c-format
gettext-files: distprep
find $(srcdir) $(srcdir)/../common $(srcdir)/../port -name '*.c' -print | LC_ALL=C sort >$@
*/
static volatile sig_atomic_t in_restore_command = false;
+/*
+ * Time at which the most recent startup operation started.
+ */
+static TimestampTz startup_progress_phase_start_time;
+
+/*
+ * Indicates whether the startup progress interval mentioned by the user is
+ * elapsed or not. TRUE if timeout occurred, FALSE otherwise.
+ */
+static volatile sig_atomic_t startup_progress_timer_expired = false;
+
+/*
+ * Time between progress updates for long-running startup operations.
+ */
+int log_startup_progress_interval = 10000; /* 10 sec */
+
/* Signal handlers */
static void StartupProcTriggerHandler(SIGNAL_ARGS);
static void StartupProcSigHupHandler(SIGNAL_ARGS);
{
promote_signaled = false;
}
+
+/*
+ * Set a flag indicating that it's time to log a progress report.
+ */
+void
+startup_progress_timeout_handler(void)
+{
+ startup_progress_timer_expired = true;
+}
+
+/*
+ * Set the start timestamp of the current operation and enable the timeout.
+ */
+void
+begin_startup_progress_phase(void)
+{
+ TimestampTz fin_time;
+
+ /* Feature is disabled. */
+ if (log_startup_progress_interval == 0)
+ return;
+
+ startup_progress_phase_start_time = GetCurrentTimestamp();
+ fin_time = TimestampTzPlusMilliseconds(startup_progress_phase_start_time,
+ log_startup_progress_interval);
+ enable_timeout_every(STARTUP_PROGRESS_TIMEOUT, fin_time,
+ log_startup_progress_interval);
+}
+
+/*
+ * Report whether startup progress timeout has occurred. Reset the timer flag
+ * if it did, set the elapsed time to the out parameters and return true,
+ * otherwise return false.
+ */
+bool
+has_startup_progress_timeout_expired(long *secs, int *usecs)
+{
+ long seconds;
+ int useconds;
+ TimestampTz now;
+
+ /* No timeout has occurred. */
+ if (!startup_progress_timer_expired)
+ return false;
+
+ /* Calculate the elapsed time. */
+ now = GetCurrentTimestamp();
+ TimestampDifference(startup_progress_phase_start_time, now, &seconds, &useconds);
+
+ *secs = seconds;
+ *usecs = useconds;
+ startup_progress_timer_expired = false;
+
+ return true;
+}
#include "pgstat.h"
#include "port/pg_iovec.h"
#include "portability/mem.h"
+#include "postmaster/startup.h"
#include "storage/fd.h"
#include "storage/ipc.h"
#include "utils/guc.h"
{
int fd;
+ ereport_startup_progress("syncing data directory (syncfs), elapsed time: %ld.%02d s, current path: %s",
+ path);
+
fd = OpenTransientFile(path, O_RDONLY);
if (fd < 0)
{
* directories.
*/
+ /* Prepare to report progress syncing the data directory via syncfs. */
+ begin_startup_progress_phase();
+
/* Sync the top level pgdata directory. */
do_syncfs(".");
/* If any tablespaces are configured, sync each of those. */
}
#endif /* !HAVE_SYNCFS */
+#ifdef PG_FLUSH_DATA_WORKS
+ /* Prepare to report progress of the pre-fsync phase. */
+ begin_startup_progress_phase();
+
/*
* If possible, hint to the kernel that we're soon going to fsync the data
* directory and its contents. Errors in this step are even less
* interesting than normal, so log them only at DEBUG1.
*/
-#ifdef PG_FLUSH_DATA_WORKS
walkdir(".", pre_sync_fname, false, DEBUG1);
if (xlog_is_symlink)
walkdir("pg_wal", pre_sync_fname, false, DEBUG1);
walkdir("pg_tblspc", pre_sync_fname, true, DEBUG1);
#endif
+ /* Prepare to report progress syncing the data directory via fsync. */
+ begin_startup_progress_phase();
+
/*
* Now we do the fsync()s in the same order.
*
if (isdir)
return;
+ ereport_startup_progress("syncing data directory (pre-fsync), elapsed time: %ld.%02d s, current path: %s",
+ fname);
+
fd = OpenTransientFile(fname, O_RDONLY | PG_BINARY);
if (fd < 0)
static void
datadir_fsync_fname(const char *fname, bool isdir, int elevel)
{
+ ereport_startup_progress("syncing data directory (fsync), elapsed time: %ld.%02d s, current path: %s",
+ fname);
+
/*
* We want to silently ignoring errors about unreadable files. Pass that
* desire on to fsync_fname_ext().
#include <unistd.h>
#include "common/relpath.h"
+#include "postmaster/startup.h"
#include "storage/copydir.h"
#include "storage/fd.h"
#include "storage/reinit.h"
ALLOCSET_DEFAULT_SIZES);
oldctx = MemoryContextSwitchTo(tmpctx);
+ /* Prepare to report progress resetting unlogged relations. */
+ begin_startup_progress_phase();
+
/*
* First process unlogged files in pg_default ($PGDATA/base)
*/
snprintf(dbspace_path, sizeof(dbspace_path), "%s/%s",
tsdirname, de->d_name);
+
+ if (op & UNLOGGED_RELATION_INIT)
+ ereport_startup_progress("resetting unlogged relations (init), elapsed time: %ld.%02d s, current path: %s",
+ dbspace_path);
+ else if (op & UNLOGGED_RELATION_CLEANUP)
+ ereport_startup_progress("resetting unlogged relations (cleanup), elapsed time: %ld.%02d s, current path: %s",
+ dbspace_path);
+
ResetUnloggedRelationsInDbspaceDir(dbspace_path, op);
}
#include "postmaster/bgworker_internals.h"
#include "postmaster/bgwriter.h"
#include "postmaster/postmaster.h"
+#include "postmaster/startup.h"
#include "postmaster/syslogger.h"
#include "postmaster/walwriter.h"
#include "replication/logicallauncher.h"
check_client_connection_check_interval, NULL, NULL
},
+ {
+ {"log_startup_progress_interval", PGC_SIGHUP, LOGGING_WHEN,
+ gettext_noop("Time between progress updates for "
+ "long-running startup operations."),
+ gettext_noop("0 turns this feature off."),
+ GUC_UNIT_MS,
+ },
+ &log_startup_progress_interval,
+ 10000, 0, INT_MAX,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
# are logged regardless of their duration; 1.0 logs all
# statements from all transactions, 0.0 never logs
+#log_startup_progress_interval = 10s # Time between progress updates for
+ # long-running startup operations.
+ # 0 disables the feature, > 0 indicates
+ # the interval in milliseconds.
+
# - What to Log -
#debug_print_parse = off
#ifndef _STARTUP_H
#define _STARTUP_H
+/*
+ * Log the startup progress message if a timer has expired.
+ */
+#define ereport_startup_progress(msg, ...) \
+ do { \
+ long secs; \
+ int usecs; \
+ if (has_startup_progress_timeout_expired(&secs, &usecs)) \
+ ereport(LOG, errmsg(msg, secs, (usecs / 10000), __VA_ARGS__ )); \
+ } while(0)
+
+extern int log_startup_progress_interval;
+
extern void HandleStartupProcInterrupts(void);
extern void StartupProcessMain(void) pg_attribute_noreturn();
extern void PreRestoreCommand(void);
extern bool IsPromoteSignaled(void);
extern void ResetPromoteSignaled(void);
+extern void begin_startup_progress_phase(void);
+extern void startup_progress_timeout_handler(void);
+extern bool has_startup_progress_timeout_expired(long *secs, int *usecs);
+
#endif /* _STARTUP_H */
IDLE_IN_TRANSACTION_SESSION_TIMEOUT,
IDLE_SESSION_TIMEOUT,
CLIENT_CONNECTION_CHECK_TIMEOUT,
+ STARTUP_PROGRESS_TIMEOUT,
/* First user-definable timeout reason */
USER_TIMEOUT,
/* Maximum number of timeout reasons */