Skip to content

Commit 4de82f7

Browse files
Wakeup WALWriter as needed for asynchronous commit performance.
Previously we waited for wal_writer_delay before flushing WAL. Now we also wake WALWriter as soon as a WAL buffer page has filled. Significant effect observed on performance of asynchronous commits by Robert Haas, attributed to the ability to set hint bits on tuples earlier and so reducing contention caused by clog lookups.
1 parent 02d88ef commit 4de82f7

File tree

3 files changed

+44
-18
lines changed

3 files changed

+44
-18
lines changed

src/backend/access/transam/xlog.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,11 @@ typedef struct XLogCtlData
432432
*/
433433
Latch recoveryWakeupLatch;
434434

435+
/*
436+
* WALWriterLatch is used to wake up the WALWriter to write some WAL.
437+
*/
438+
Latch WALWriterLatch;
439+
435440
/*
436441
* During recovery, we keep a copy of the latest checkpoint record here.
437442
* Used by the background writer when it wants to create a restartpoint.
@@ -1916,19 +1921,35 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible, bool xlog_switch)
19161921
}
19171922

19181923
/*
1919-
* Record the LSN for an asynchronous transaction commit/abort.
1924+
* Record the LSN for an asynchronous transaction commit/abort
1925+
* and nudge the WALWriter if there is a complete page to write.
19201926
* (This should not be called for for synchronous commits.)
19211927
*/
19221928
void
19231929
XLogSetAsyncXactLSN(XLogRecPtr asyncXactLSN)
19241930
{
1931+
XLogRecPtr WriteRqstPtr = asyncXactLSN;
1932+
19251933
/* use volatile pointer to prevent code rearrangement */
19261934
volatile XLogCtlData *xlogctl = XLogCtl;
19271935

19281936
SpinLockAcquire(&xlogctl->info_lck);
1937+
LogwrtResult = xlogctl->LogwrtResult;
19291938
if (XLByteLT(xlogctl->asyncXactLSN, asyncXactLSN))
19301939
xlogctl->asyncXactLSN = asyncXactLSN;
19311940
SpinLockRelease(&xlogctl->info_lck);
1941+
1942+
/* back off to last completed page boundary */
1943+
WriteRqstPtr.xrecoff -= WriteRqstPtr.xrecoff % XLOG_BLCKSZ;
1944+
1945+
/* if we have already flushed that far, we're done */
1946+
if (XLByteLE(WriteRqstPtr, LogwrtResult.Flush))
1947+
return;
1948+
1949+
/*
1950+
* Nudge the WALWriter if we have a full page of WAL to write.
1951+
*/
1952+
SetLatch(&XLogCtl->WALWriterLatch);
19321953
}
19331954

19341955
/*
@@ -5072,6 +5093,7 @@ XLOGShmemInit(void)
50725093
XLogCtl->Insert.currpage = (XLogPageHeader) (XLogCtl->pages);
50735094
SpinLockInit(&XLogCtl->info_lck);
50745095
InitSharedLatch(&XLogCtl->recoveryWakeupLatch);
5096+
InitSharedLatch(&XLogCtl->WALWriterLatch);
50755097

50765098
/*
50775099
* If we are not in bootstrap mode, pg_control should already exist. Read
@@ -10013,3 +10035,12 @@ WakeupRecovery(void)
1001310035
{
1001410036
SetLatch(&XLogCtl->recoveryWakeupLatch);
1001510037
}
10038+
10039+
/*
10040+
* Manage the WALWriterLatch
10041+
*/
10042+
Latch *
10043+
WALWriterLatch(void)
10044+
{
10045+
return &XLogCtl->WALWriterLatch;
10046+
}

src/backend/postmaster/walwriter.c

Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
*
1212
* Note that as with the bgwriter for shared buffers, regular backends are
1313
* still empowered to issue WAL writes and fsyncs when the walwriter doesn't
14-
* keep up.
14+
* keep up. This means that the WALWriter is not an essential process and
15+
* can shutdown quickly when requested.
1516
*
1617
* Because the walwriter's cycle is directly linked to the maximum delay
1718
* before async-commit transactions are guaranteed committed, it's probably
@@ -76,7 +77,6 @@ static void wal_quickdie(SIGNAL_ARGS);
7677
static void WalSigHupHandler(SIGNAL_ARGS);
7778
static void WalShutdownHandler(SIGNAL_ARGS);
7879

79-
8080
/*
8181
* Main entry point for walwriter process
8282
*
@@ -89,6 +89,8 @@ WalWriterMain(void)
8989
sigjmp_buf local_sigjmp_buf;
9090
MemoryContext walwriter_context;
9191

92+
InitLatch(WALWriterLatch()); /* initialize latch used in main loop */
93+
9294
/*
9395
* If possible, make this process a group leader, so that the postmaster
9496
* can signal any child processes too. (walwriter probably never has any
@@ -220,7 +222,7 @@ WalWriterMain(void)
220222
*/
221223
for (;;)
222224
{
223-
long udelay;
225+
ResetLatch(WALWriterLatch());
224226

225227
/*
226228
* Emergency bailout if postmaster has died. This is to avoid the
@@ -248,20 +250,9 @@ WalWriterMain(void)
248250
*/
249251
XLogBackgroundFlush();
250252

251-
/*
252-
* Delay until time to do something more, but fall out of delay
253-
* reasonably quickly if signaled.
254-
*/
255-
udelay = WalWriterDelay * 1000L;
256-
while (udelay > 999999L)
257-
{
258-
if (got_SIGHUP || shutdown_requested)
259-
break;
260-
pg_usleep(1000000L);
261-
udelay -= 1000000L;
262-
}
263-
if (!(got_SIGHUP || shutdown_requested))
264-
pg_usleep(udelay);
253+
(void) WaitLatch(WALWriterLatch(),
254+
WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH,
255+
WalWriterDelay /* ms */);
265256
}
266257
}
267258

@@ -308,11 +299,13 @@ static void
308299
WalSigHupHandler(SIGNAL_ARGS)
309300
{
310301
got_SIGHUP = true;
302+
SetLatch(WALWriterLatch());
311303
}
312304

313305
/* SIGTERM: set flag to exit normally */
314306
static void
315307
WalShutdownHandler(SIGNAL_ARGS)
316308
{
317309
shutdown_requested = true;
310+
SetLatch(WALWriterLatch());
318311
}

src/include/access/xlog.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "datatype/timestamp.h"
1717
#include "lib/stringinfo.h"
1818
#include "storage/buf.h"
19+
#include "storage/latch.h"
1920
#include "utils/pg_crc.h"
2021

2122
/*
@@ -319,6 +320,7 @@ extern TimeLineID GetRecoveryTargetTLI(void);
319320

320321
extern bool CheckPromoteSignal(void);
321322
extern void WakeupRecovery(void);
323+
extern Latch *WALWriterLatch(void);
322324

323325
/*
324326
* Starting/stopping a base backup

0 commit comments

Comments
 (0)