Skip to content

Commit 2cdf131

Browse files
committed
Use larger segment file names for pg_notify
This avoids the wraparound in async.c and removes the corresponding code complexity. The maximum amount of allocated SLRU pages for NOTIFY / LISTEN queue is now determined by the max_notify_queue_pages GUC. The default value is 1048576. It allows to consume up to 8 GB of disk space which is exactly the limit we had previously. Author: Maxim Orlov, Aleksander Alekseev, Alexander Korotkov, Teodor Sigaev Author: Nikita Glukhov, Pavel Borisov, Yura Sokolov Reviewed-by: Jacob Champion, Heikki Linnakangas, Alexander Korotkov Reviewed-by: Japin Li, Pavel Borisov, Tom Lane, Peter Eisentraut, Andres Freund Reviewed-by: Andrey Borodin, Dilip Kumar, Aleksander Alekseev Discussion: https://postgr.es/m/CACG%3DezZe1NQSCnfHOr78AtAZxJZeCvxrts0ygrxYwe%3DpyyjVWA%40mail.gmail.com Discussion: https://postgr.es/m/CAJ7c6TPDOYBYrnCAeyndkBktO0WG2xSdYduTF0nxq%2BvfkmTF5Q%40mail.gmail.com
1 parent 4ed8f09 commit 2cdf131

File tree

7 files changed

+62
-92
lines changed

7 files changed

+62
-92
lines changed

doc/src/sgml/config.sgml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2151,6 +2151,22 @@ include_dir 'conf.d'
21512151
</listitem>
21522152
</varlistentry>
21532153

2154+
<varlistentry id="guc-max-notify-queue-pages" xreflabel="max_notify_queue_pages">
2155+
<term><varname>max_notify_queue_pages</varname> (<type>integer</type>)
2156+
<indexterm>
2157+
<primary><varname>max_notify_queue_pages</varname> configuration parameter</primary>
2158+
</indexterm>
2159+
</term>
2160+
<listitem>
2161+
<para>
2162+
Specifies the maximum amount of allocated pages for
2163+
<xref linkend="sql-notify"/> / <xref linkend="sql-listen"/> queue.
2164+
The default value is 1048576. For 8 KB pages it allows to consume
2165+
up to 8 GB of disk space.
2166+
</para>
2167+
</listitem>
2168+
</varlistentry>
2169+
21542170
</variablelist>
21552171
</sect2>
21562172

doc/src/sgml/ref/listen.sgml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@ Asynchronous notification "virtual" received from server process with PID 8448.
148148
<simplelist type="inline">
149149
<member><xref linkend="sql-notify"/></member>
150150
<member><xref linkend="sql-unlisten"/></member>
151+
<member><xref linkend="guc-max-notify-queue-pages"/></member>
151152
</simplelist>
152153
</refsect1>
153154
</refentry>

doc/src/sgml/ref/notify.sgml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ Asynchronous notification "foo" with payload "payload" received from server proc
228228
<simplelist type="inline">
229229
<member><xref linkend="sql-listen"/></member>
230230
<member><xref linkend="sql-unlisten"/></member>
231+
<member><xref linkend="guc-max-notify-queue-pages"/></member>
231232
</simplelist>
232233
</refsect1>
233234
</refentry>

src/backend/commands/async.c

Lines changed: 30 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -103,12 +103,11 @@
103103
* until we reach either a notification from an uncommitted transaction or
104104
* the head pointer's position.
105105
*
106-
* 6. To avoid SLRU wraparound and limit disk space consumption, the tail
107-
* pointer needs to be advanced so that old pages can be truncated.
108-
* This is relatively expensive (notably, it requires an exclusive lock),
109-
* so we don't want to do it often. We make sending backends do this work
110-
* if they advanced the queue head into a new page, but only once every
111-
* QUEUE_CLEANUP_DELAY pages.
106+
* 6. To limit disk space consumption, the tail pointer needs to be advanced
107+
* so that old pages can be truncated. This is relatively expensive
108+
* (notably, it requires an exclusive lock), so we don't want to do it
109+
* often. We make sending backends do this work if they advanced the queue
110+
* head into a new page, but only once every QUEUE_CLEANUP_DELAY pages.
112111
*
113112
* An application that listens on the same channel it notifies will get
114113
* NOTIFY messages for its own NOTIFYs. These can be ignored, if not useful,
@@ -120,7 +119,7 @@
120119
* The amount of shared memory used for notify management (NUM_NOTIFY_BUFFERS)
121120
* can be varied without affecting anything but performance. The maximum
122121
* amount of notification data that can be queued at one time is determined
123-
* by slru.c's wraparound limit; see QUEUE_MAX_PAGE below.
122+
* by max_notify_queue_pages GUC.
124123
*-------------------------------------------------------------------------
125124
*/
126125

@@ -312,23 +311,8 @@ static SlruCtlData NotifyCtlData;
312311

313312
#define NotifyCtl (&NotifyCtlData)
314313
#define QUEUE_PAGESIZE BLCKSZ
315-
#define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */
316314

317-
/*
318-
* Use segments 0000 through FFFF. Each contains SLRU_PAGES_PER_SEGMENT pages
319-
* which gives us the pages from 0 to SLRU_PAGES_PER_SEGMENT * 0x10000 - 1.
320-
* We could use as many segments as SlruScanDirectory() allows, but this gives
321-
* us so much space already that it doesn't seem worth the trouble.
322-
*
323-
* The most data we can have in the queue at a time is QUEUE_MAX_PAGE/2
324-
* pages, because more than that would confuse slru.c into thinking there
325-
* was a wraparound condition. With the default BLCKSZ this means there
326-
* can be up to 8GB of queued-and-not-read data.
327-
*
328-
* Note: it's possible to redefine QUEUE_MAX_PAGE with a smaller multiple of
329-
* SLRU_PAGES_PER_SEGMENT, for easier testing of queue-full behaviour.
330-
*/
331-
#define QUEUE_MAX_PAGE (SLRU_PAGES_PER_SEGMENT * 0x10000 - 1)
315+
#define QUEUE_FULL_WARN_INTERVAL 5000 /* warn at most once every 5s */
332316

333317
/*
334318
* listenChannels identifies the channels we are actually listening to
@@ -439,12 +423,15 @@ static bool amRegisteredListener = false;
439423
/* have we advanced to a page that's a multiple of QUEUE_CLEANUP_DELAY? */
440424
static bool tryAdvanceTail = false;
441425

442-
/* GUC parameter */
426+
/* GUC parameters */
443427
bool Trace_notify = false;
444428

429+
/* For 8 KB pages this gives 8 GB of disk space */
430+
int max_notify_queue_pages = 1048576;
431+
445432
/* local function prototypes */
446-
static int64 asyncQueuePageDiff(int64 p, int64 q);
447-
static bool asyncQueuePagePrecedes(int64 p, int64 q);
433+
static inline int64 asyncQueuePageDiff(int64 p, int64 q);
434+
static inline bool asyncQueuePagePrecedes(int64 p, int64 q);
448435
static void queue_listen(ListenActionKind action, const char *channel);
449436
static void Async_UnlistenOnExit(int code, Datum arg);
450437
static void Exec_ListenPreCommit(void);
@@ -474,39 +461,23 @@ static int notification_match(const void *key1, const void *key2, Size keysize);
474461
static void ClearPendingActionsAndNotifies(void);
475462

476463
/*
477-
* Compute the difference between two queue page numbers (i.e., p - q),
478-
* accounting for wraparound.
464+
* Compute the difference between two queue page numbers.
465+
* Previously this function accounted for a wraparound.
479466
*/
480-
static int64
467+
static inline int64
481468
asyncQueuePageDiff(int64 p, int64 q)
482469
{
483-
int64 diff;
484-
485-
/*
486-
* We have to compare modulo (QUEUE_MAX_PAGE+1)/2. Both inputs should be
487-
* in the range 0..QUEUE_MAX_PAGE.
488-
*/
489-
Assert(p >= 0 && p <= QUEUE_MAX_PAGE);
490-
Assert(q >= 0 && q <= QUEUE_MAX_PAGE);
491-
492-
diff = p - q;
493-
if (diff >= ((QUEUE_MAX_PAGE + 1) / 2))
494-
diff -= QUEUE_MAX_PAGE + 1;
495-
else if (diff < -((QUEUE_MAX_PAGE + 1) / 2))
496-
diff += QUEUE_MAX_PAGE + 1;
497-
return diff;
470+
return p - q;
498471
}
499472

500473
/*
501-
* Is p < q, accounting for wraparound?
502-
*
503-
* Since asyncQueueIsFull() blocks creation of a page that could precede any
504-
* extant page, we need not assess entries within a page.
474+
* Determines whether p precedes q.
475+
* Previously this function accounted for a wraparound.
505476
*/
506-
static bool
477+
static inline bool
507478
asyncQueuePagePrecedes(int64 p, int64 q)
508479
{
509-
return asyncQueuePageDiff(p, q) < 0;
480+
return p < q;
510481
}
511482

512483
/*
@@ -566,12 +537,13 @@ AsyncShmemInit(void)
566537
}
567538

568539
/*
569-
* Set up SLRU management of the pg_notify data.
540+
* Set up SLRU management of the pg_notify data. Note that long segment
541+
* names are used in order to avoid wraparound.
570542
*/
571543
NotifyCtl->PagePrecedes = asyncQueuePagePrecedes;
572544
SimpleLruInit(NotifyCtl, "Notify", NUM_NOTIFY_BUFFERS, 0,
573545
NotifySLRULock, "pg_notify", LWTRANCHE_NOTIFY_BUFFER,
574-
SYNC_HANDLER_NONE, false);
546+
SYNC_HANDLER_NONE, true);
575547

576548
if (!found)
577549
{
@@ -1305,27 +1277,11 @@ asyncQueueUnregister(void)
13051277
static bool
13061278
asyncQueueIsFull(void)
13071279
{
1308-
int nexthead;
1309-
int boundary;
1280+
int headPage = QUEUE_POS_PAGE(QUEUE_HEAD);
1281+
int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL);
1282+
int occupied = headPage - tailPage;
13101283

1311-
/*
1312-
* The queue is full if creating a new head page would create a page that
1313-
* logically precedes the current global tail pointer, ie, the head
1314-
* pointer would wrap around compared to the tail. We cannot create such
1315-
* a head page for fear of confusing slru.c. For safety we round the tail
1316-
* pointer back to a segment boundary (truncation logic in
1317-
* asyncQueueAdvanceTail does not do this, so doing it here is optional).
1318-
*
1319-
* Note that this test is *not* dependent on how much space there is on
1320-
* the current head page. This is necessary because asyncQueueAddEntries
1321-
* might try to create the next head page in any case.
1322-
*/
1323-
nexthead = QUEUE_POS_PAGE(QUEUE_HEAD) + 1;
1324-
if (nexthead > QUEUE_MAX_PAGE)
1325-
nexthead = 0; /* wrap around */
1326-
boundary = QUEUE_STOP_PAGE;
1327-
boundary -= boundary % SLRU_PAGES_PER_SEGMENT;
1328-
return asyncQueuePagePrecedes(nexthead, boundary);
1284+
return occupied >= max_notify_queue_pages;
13291285
}
13301286

13311287
/*
@@ -1355,8 +1311,6 @@ asyncQueueAdvance(volatile QueuePosition *position, int entryLength)
13551311
if (offset + QUEUEALIGN(AsyncQueueEntryEmptySize) > QUEUE_PAGESIZE)
13561312
{
13571313
pageno++;
1358-
if (pageno > QUEUE_MAX_PAGE)
1359-
pageno = 0; /* wrap around */
13601314
offset = 0;
13611315
pageJump = true;
13621316
}
@@ -1433,9 +1387,6 @@ asyncQueueAddEntries(ListCell *nextNotify)
14331387
* If this is the first write since the postmaster started, we need to
14341388
* initialize the first page of the async SLRU. Otherwise, the current
14351389
* page should be initialized already, so just fetch it.
1436-
*
1437-
* (We could also take the first path when the SLRU position has just
1438-
* wrapped around, but re-zeroing the page is harmless in that case.)
14391390
*/
14401391
pageno = QUEUE_POS_PAGE(queue_head);
14411392
if (QUEUE_POS_IS_ZERO(queue_head))
@@ -1548,20 +1499,12 @@ asyncQueueUsage(void)
15481499
{
15491500
int headPage = QUEUE_POS_PAGE(QUEUE_HEAD);
15501501
int tailPage = QUEUE_POS_PAGE(QUEUE_TAIL);
1551-
int occupied;
1552-
1553-
occupied = headPage - tailPage;
1502+
int occupied = headPage - tailPage;
15541503

15551504
if (occupied == 0)
15561505
return (double) 0; /* fast exit for common case */
15571506

1558-
if (occupied < 0)
1559-
{
1560-
/* head has wrapped around, tail not yet */
1561-
occupied += QUEUE_MAX_PAGE + 1;
1562-
}
1563-
1564-
return (double) occupied / (double) ((QUEUE_MAX_PAGE + 1) / 2);
1507+
return (double) occupied / (double) max_notify_queue_pages;
15651508
}
15661509

15671510
/*
@@ -2209,11 +2152,6 @@ asyncQueueAdvanceTail(void)
22092152
*/
22102153
SimpleLruTruncate(NotifyCtl, newtailpage);
22112154

2212-
/*
2213-
* Update QUEUE_STOP_PAGE. This changes asyncQueueIsFull()'s verdict
2214-
* for the segment immediately prior to the old tail, allowing fresh
2215-
* data into that segment.
2216-
*/
22172155
LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
22182156
QUEUE_STOP_PAGE = newtailpage;
22192157
LWLockRelease(NotifyQueueLock);

src/backend/utils/misc/guc_tables.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2687,6 +2687,16 @@ struct config_int ConfigureNamesInt[] =
26872687
NULL, NULL, NULL
26882688
},
26892689

2690+
{
2691+
{"max_notify_queue_pages", PGC_POSTMASTER, RESOURCES_DISK,
2692+
gettext_noop("Sets the maximum number of allocated pages for NOTIFY / LISTEN queue."),
2693+
NULL,
2694+
},
2695+
&max_notify_queue_pages,
2696+
1048576, 64, INT_MAX,
2697+
NULL, NULL, NULL
2698+
},
2699+
26902700
{
26912701
{"wal_decode_buffer_size", PGC_POSTMASTER, WAL_RECOVERY,
26922702
gettext_noop("Buffer size for reading ahead in the WAL during recovery."),

src/backend/utils/misc/postgresql.conf.sample

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,9 @@
166166
#temp_file_limit = -1 # limits per-process temp file space
167167
# in kilobytes, or -1 for no limit
168168

169+
#max_notify_queue_pages = 1048576 # limits the number of SLRU pages allocated
170+
# for NOTIFY / LISTEN queue
171+
169172
# - Kernel Resources -
170173

171174
#max_files_per_process = 1000 # min 64

src/include/commands/async.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#define NUM_NOTIFY_BUFFERS 8
2222

2323
extern PGDLLIMPORT bool Trace_notify;
24+
extern PGDLLIMPORT int max_notify_queue_pages;
2425
extern PGDLLIMPORT volatile sig_atomic_t notifyInterruptPending;
2526

2627
extern Size AsyncShmemSize(void);

0 commit comments

Comments
 (0)