Skip to content

Commit 2bc0aff

Browse files
Álvaro HerreraCommitfest Bot
authored andcommitted
LogChildExit / HandleChildCrash support
(Didn't actually test that bgworkers are doing the expected thing!)
1 parent 8b74686 commit 2bc0aff

File tree

3 files changed

+63
-62
lines changed

3 files changed

+63
-62
lines changed

src/backend/postmaster/postmaster.c

Lines changed: 61 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,8 @@ static void process_pm_reload_request(void);
428428
static void process_pm_shutdown_request(void);
429429
static void dummy_handler(SIGNAL_ARGS);
430430
static void CleanupBackend(PMChild *bp, int exitstatus);
431-
static void HandleChildCrash(int pid, int exitstatus, const char *procname);
432-
static void LogChildExit(int lev, const char *procname,
431+
static void HandleChildCrash(int pid, int exitstatus, BackendType proctype, const char *addtype);
432+
static void LogChildExit(int lev, BackendType proctype, const char *addtype,
433433
int pid, int exitstatus);
434434
static void PostmasterStateMachine(void);
435435
static void UpdatePMState(PMState newState);
@@ -2285,8 +2285,7 @@ process_pm_child_exit(void)
22852285
StartupStatus != STARTUP_SIGNALED &&
22862286
!EXIT_STATUS_0(exitstatus))
22872287
{
2288-
LogChildExit(LOG, _("startup process"),
2289-
pid, exitstatus);
2288+
LogChildExit(LOG, B_STARTUP, NULL, pid, exitstatus);
22902289
ereport(LOG,
22912290
(errmsg("aborting startup due to startup process failure")));
22922291
ExitPostmaster(1);
@@ -2320,8 +2319,7 @@ process_pm_child_exit(void)
23202319
}
23212320
else
23222321
StartupStatus = STARTUP_CRASHED;
2323-
HandleChildCrash(pid, exitstatus,
2324-
_("startup process"));
2322+
HandleChildCrash(pid, exitstatus, B_STARTUP, NULL);
23252323
continue;
23262324
}
23272325

@@ -2365,8 +2363,7 @@ process_pm_child_exit(void)
23652363
ReleasePostmasterChildSlot(BgWriterPMChild);
23662364
BgWriterPMChild = NULL;
23672365
if (!EXIT_STATUS_0(exitstatus))
2368-
HandleChildCrash(pid, exitstatus,
2369-
_("background writer process"));
2366+
HandleChildCrash(pid, exitstatus, B_BG_WRITER, NULL);
23702367
continue;
23712368
}
23722369

@@ -2398,8 +2395,7 @@ process_pm_child_exit(void)
23982395
* Any unexpected exit of the checkpointer (including FATAL
23992396
* exit) is treated as a crash.
24002397
*/
2401-
HandleChildCrash(pid, exitstatus,
2402-
_("checkpointer process"));
2398+
HandleChildCrash(pid, exitstatus, B_CHECKPOINTER, NULL);
24032399
}
24042400

24052401
continue;
@@ -2415,8 +2411,7 @@ process_pm_child_exit(void)
24152411
ReleasePostmasterChildSlot(WalWriterPMChild);
24162412
WalWriterPMChild = NULL;
24172413
if (!EXIT_STATUS_0(exitstatus))
2418-
HandleChildCrash(pid, exitstatus,
2419-
_("WAL writer process"));
2414+
HandleChildCrash(pid, exitstatus, B_WAL_WRITER, NULL);
24202415
continue;
24212416
}
24222417

@@ -2431,8 +2426,7 @@ process_pm_child_exit(void)
24312426
ReleasePostmasterChildSlot(WalReceiverPMChild);
24322427
WalReceiverPMChild = NULL;
24332428
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2434-
HandleChildCrash(pid, exitstatus,
2435-
_("WAL receiver process"));
2429+
HandleChildCrash(pid, exitstatus, B_WAL_RECEIVER, NULL);
24362430
continue;
24372431
}
24382432

@@ -2446,8 +2440,7 @@ process_pm_child_exit(void)
24462440
ReleasePostmasterChildSlot(WalSummarizerPMChild);
24472441
WalSummarizerPMChild = NULL;
24482442
if (!EXIT_STATUS_0(exitstatus))
2449-
HandleChildCrash(pid, exitstatus,
2450-
_("WAL summarizer process"));
2443+
HandleChildCrash(pid, exitstatus, B_WAL_SUMMARIZER, NULL);
24512444
continue;
24522445
}
24532446

@@ -2462,8 +2455,7 @@ process_pm_child_exit(void)
24622455
ReleasePostmasterChildSlot(AutoVacLauncherPMChild);
24632456
AutoVacLauncherPMChild = NULL;
24642457
if (!EXIT_STATUS_0(exitstatus))
2465-
HandleChildCrash(pid, exitstatus,
2466-
_("autovacuum launcher process"));
2458+
HandleChildCrash(pid, exitstatus, B_AUTOVAC_LAUNCHER, NULL);
24672459
continue;
24682460
}
24692461

@@ -2478,8 +2470,7 @@ process_pm_child_exit(void)
24782470
ReleasePostmasterChildSlot(PgArchPMChild);
24792471
PgArchPMChild = NULL;
24802472
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2481-
HandleChildCrash(pid, exitstatus,
2482-
_("archiver process"));
2473+
HandleChildCrash(pid, exitstatus, B_ARCHIVER, NULL);
24832474
continue;
24842475
}
24852476

@@ -2494,8 +2485,7 @@ process_pm_child_exit(void)
24942485
StartSysLogger();
24952486

24962487
if (!EXIT_STATUS_0(exitstatus))
2497-
LogChildExit(LOG, _("system logger process"),
2498-
pid, exitstatus);
2488+
LogChildExit(LOG, B_LOGGER, NULL, pid, exitstatus);
24992489
continue;
25002490
}
25012491

@@ -2511,16 +2501,15 @@ process_pm_child_exit(void)
25112501
ReleasePostmasterChildSlot(SlotSyncWorkerPMChild);
25122502
SlotSyncWorkerPMChild = NULL;
25132503
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2514-
HandleChildCrash(pid, exitstatus,
2515-
_("slot sync worker process"));
2504+
HandleChildCrash(pid, exitstatus, B_SLOTSYNC_WORKER, NULL);
25162505
continue;
25172506
}
25182507

25192508
/* Was it an IO worker? */
25202509
if (maybe_reap_io_worker(pid))
25212510
{
25222511
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2523-
HandleChildCrash(pid, exitstatus, _("io worker"));
2512+
HandleChildCrash(pid, exitstatus, B_IO_WORKER, NULL);
25242513

25252514
maybe_adjust_io_workers();
25262515
continue;
@@ -2542,9 +2531,9 @@ process_pm_child_exit(void)
25422531
else
25432532
{
25442533
if (!EXIT_STATUS_0(exitstatus) && !EXIT_STATUS_1(exitstatus))
2545-
HandleChildCrash(pid, exitstatus, _("untracked child process"));
2534+
HandleChildCrash(pid, exitstatus, B_INVALID, NULL);
25462535
else
2547-
LogChildExit(LOG, _("untracked child process"), pid, exitstatus);
2536+
LogChildExit(LOG, B_INVALID, NULL, pid, exitstatus);
25482537
}
25492538
} /* loop over pending child-death reports */
25502539

@@ -2565,8 +2554,8 @@ static void
25652554
CleanupBackend(PMChild *bp,
25662555
int exitstatus) /* child's exit status. */
25672556
{
2568-
char namebuf[MAXPGPATH];
2569-
const char *procname;
2557+
char namebuf[MAXPGPATH];
2558+
char *procname;
25702559
bool crashed = false;
25712560
bool logged = false;
25722561
pid_t bp_pid;
@@ -2575,14 +2564,13 @@ CleanupBackend(PMChild *bp,
25752564
RegisteredBgWorker *rw;
25762565

25772566
/* Construct a process name for the log message */
2578-
if (bp->bkend_type == B_BG_WORKER)
2567+
if (bp && bp->bkend_type == B_BG_WORKER && bp->rw)
25792568
{
2580-
snprintf(namebuf, MAXPGPATH, _("background worker \"%s\""),
2581-
bp->rw->rw_worker.bgw_type);
2569+
strlcpy(namebuf, bp->rw->rw_worker.bgw_type, MAXPGPATH);
25822570
procname = namebuf;
25832571
}
25842572
else
2585-
procname = _(GetBackendTypeDesc(bp->bkend_type));
2573+
procname = NULL;
25862574

25872575
/*
25882576
* If a backend dies in an ugly way then we must signal all other backends
@@ -2604,7 +2592,7 @@ CleanupBackend(PMChild *bp,
26042592
*/
26052593
if (exitstatus == ERROR_WAIT_NO_CHILDREN)
26062594
{
2607-
LogChildExit(LOG, procname, bp->pid, exitstatus);
2595+
LogChildExit(LOG, bp->bkend_type, procname, bp->pid, exitstatus);
26082596
logged = true;
26092597
crashed = false;
26102598
}
@@ -2639,7 +2627,7 @@ CleanupBackend(PMChild *bp,
26392627
*/
26402628
if (crashed)
26412629
{
2642-
HandleChildCrash(bp_pid, exitstatus, procname);
2630+
HandleChildCrash(bp_pid, exitstatus, bp_bkend_type, procname);
26432631
return;
26442632
}
26452633

@@ -2677,7 +2665,7 @@ CleanupBackend(PMChild *bp,
26772665
if (!logged)
26782666
{
26792667
LogChildExit(EXIT_STATUS_0(exitstatus) ? DEBUG1 : LOG,
2680-
procname, bp_pid, exitstatus);
2668+
bp_bkend_type, procname, bp_pid, exitstatus);
26812669
logged = true;
26822670
}
26832671

@@ -2686,7 +2674,7 @@ CleanupBackend(PMChild *bp,
26862674
}
26872675

26882676
if (!logged)
2689-
LogChildExit(DEBUG2, procname, bp_pid, exitstatus);
2677+
LogChildExit(DEBUG2, bp_bkend_type, procname, bp_pid, exitstatus);
26902678
}
26912679

26922680
/*
@@ -2778,16 +2766,16 @@ HandleFatalError(QuitSignalReason reason, bool consider_sigabrt)
27782766
}
27792767

27802768
/*
2781-
* HandleChildCrash -- cleanup after failed backend, bgwriter, checkpointer,
2782-
* walwriter, autovacuum, archiver, slot sync worker, or background worker.
2769+
* HandleChildCrash -- cleanup after failed backend or certain auxiliary
2770+
* processes.
27832771
*
27842772
* The objectives here are to clean up our local state about the child
27852773
* process, and to signal all other remaining children to quickdie.
27862774
*
27872775
* The caller has already released its PMChild slot.
27882776
*/
27892777
static void
2790-
HandleChildCrash(int pid, int exitstatus, const char *procname)
2778+
HandleChildCrash(int pid, int exitstatus, BackendType proctype, const char *addtype)
27912779
{
27922780
/*
27932781
* We only log messages and send signals if this is the first process
@@ -2799,7 +2787,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
27992787
if (FatalError || Shutdown == ImmediateShutdown)
28002788
return;
28012789

2802-
LogChildExit(LOG, procname, pid, exitstatus);
2790+
LogChildExit(LOG, proctype, addtype, pid, exitstatus);
28032791
ereport(LOG,
28042792
(errmsg("terminating any other active server processes")));
28052793

@@ -2812,9 +2800,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
28122800

28132801
/*
28142802
* Log the death of a child process.
2803+
*
2804+
* 'addtype' is an additional word or short phrase that describes the process,
2805+
* such as a background worker 'type'.
28152806
*/
28162807
static void
2817-
LogChildExit(int lev, const char *procname, int pid, int exitstatus)
2808+
LogChildExit(int lev, BackendType proctype, const char *addtype, int pid,
2809+
int exitstatus)
28182810
{
28192811
/*
28202812
* size of activity_buffer is arbitrary, but set equal to default
@@ -2829,14 +2821,13 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
28292821
sizeof(activity_buffer));
28302822

28312823
if (WIFEXITED(exitstatus))
2832-
ereport(lev,
2824+
ereport(lev, addtype ?
2825+
errmsg("\"%s\" process of type \"%s\" (PID %d) exited with exit code %d",
2826+
GetBackendTypeDesc(proctype), addtype, pid, WEXITSTATUS(exitstatus)) :
2827+
errmsg("process of type \"%s\" (PID %d) exited with exit code %d",
2828+
GetBackendTypeDesc(proctype), pid, WEXITSTATUS(exitstatus)),
28332829

2834-
/*------
2835-
translator: %s is a noun phrase describing a child process, such as
2836-
"server process" */
2837-
(errmsg("%s (PID %d) exited with exit code %d",
2838-
procname, pid, WEXITSTATUS(exitstatus)),
2839-
activity ? errdetail("Failed process was running: %s", activity) : 0));
2830+
activity ? errdetail("Failed process was running: %s", activity) : 0);
28402831
else if (WIFSIGNALED(exitstatus))
28412832
{
28422833
#if defined(WIN32)
@@ -2845,20 +2836,27 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
28452836
/*------
28462837
translator: %s is a noun phrase describing a child process, such as
28472838
"server process" */
2848-
(errmsg("%s (PID %d) was terminated by exception 0x%X",
2849-
procname, pid, WTERMSIG(exitstatus)),
2850-
errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
2851-
activity ? errdetail("Failed process was running: %s", activity) : 0));
2839+
addtype ?
2840+
errmsg("\"%s\" process of type \"%s\" (PID %d) was terminated by exception 0x%X",
2841+
GetBackendTypeDesc(proctype), addtype, pid, WTERMSIG(exitstatus)) :
2842+
errmsg("\"%s\" process (PID %d) was terminated by exception 0x%X",
2843+
GetBackendTypeDesc(proctype), addtype, pid, WTERMSIG(exitstatus)),
2844+
errhint("See C include file \"ntstatus.h\" for a description of the hexadecimal value."),
2845+
activity ? errdetail("Failed process was running: %s", activity) : 0);
28522846
#else
28532847
ereport(lev,
28542848

28552849
/*------
28562850
translator: %s is a noun phrase describing a child process, such as
28572851
"server process" */
2858-
(errmsg("%s (PID %d) was terminated by signal %d: %s",
2859-
procname, pid, WTERMSIG(exitstatus),
2860-
pg_strsignal(WTERMSIG(exitstatus))),
2861-
activity ? errdetail("Failed process was running: %s", activity) : 0));
2852+
addtype ?
2853+
errmsg("\"%s\" process of type \"%s\" (PID %d) was terminated by signal %d: %s",
2854+
GetBackendTypeDesc(proctype), addtype, pid, WTERMSIG(exitstatus),
2855+
pg_strsignal(WTERMSIG(exitstatus))) :
2856+
errmsg("\"%s\" process (PID %d) was terminated by signal %d: %s",
2857+
GetBackendTypeDesc(proctype), pid, WTERMSIG(exitstatus),
2858+
pg_strsignal(WTERMSIG(exitstatus))),
2859+
activity ? errdetail("Failed process was running: %s", activity) : 0);
28622860
#endif
28632861
}
28642862
else
@@ -2867,9 +2865,12 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
28672865
/*------
28682866
translator: %s is a noun phrase describing a child process, such as
28692867
"server process" */
2870-
(errmsg("%s (PID %d) exited with unrecognized status %d",
2871-
procname, pid, exitstatus),
2872-
activity ? errdetail("Failed process was running: %s", activity) : 0));
2868+
addtype ?
2869+
errmsg("\"%s\" process of type \"%s\" (PID %d) exited with unrecognized status %d",
2870+
GetBackendTypeDesc(proctype), addtype, pid, exitstatus) :
2871+
errmsg("\"%s\" process (PID %d) exited with unrecognized status %d",
2872+
GetBackendTypeDesc(proctype), pid, exitstatus),
2873+
activity ? errdetail("Failed process was running: %s", activity) : 0);
28732874
}
28742875

28752876
/*

src/test/perl/PostgreSQL/Test/Cluster.pm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2696,7 +2696,7 @@ sub connect_fails
26962696
if (defined($params{log_like}) or defined($params{log_unlike}))
26972697
{
26982698
$self->wait_for_log(
2699-
qr/DEBUG: (?:00000: )?forked new client backend, pid=(\d+) socket.*DEBUG: (?:00000: )?client backend \(PID \1\) exited with exit code \d/s,
2699+
qr/DEBUG: (?:00000: )?forked new client backend, pid=(\d+) socket.*DEBUG: (?:00000: )?process of type \"client backend\" \(PID \1\) exited with exit code \d/s,
27002700
$log_location);
27012701

27022702
$self->log_check($test_name, $log_location, %params);

src/test/postmaster/t/002_connection_limits.pl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ sub connect_fails_wait
6969

7070
$node->connect_fails($connstr, $test_name, %params);
7171
$node->wait_for_log(
72-
qr/DEBUG: (00000: )?client backend.*exited with exit code 1/,
72+
qr/DEBUG: (00000: )?process of type "client backend".*exited with exit code 1/,
7373
$log_location);
7474
ok(1, "$test_name: client backend process exited");
7575
}

0 commit comments

Comments
 (0)