Add support for LZ4 with compression of full-page writes in WAL
authorMichael Paquier <[email protected]>
Tue, 29 Jun 2021 02:17:55 +0000 (11:17 +0900)
committerMichael Paquier <[email protected]>
Tue, 29 Jun 2021 02:17:55 +0000 (11:17 +0900)
The logic is implemented so as there can be a choice in the compression
used when building a WAL record, and an extra per-record bit is used to
track down if a block is compressed with PGLZ, LZ4 or nothing.

wal_compression, the existing parameter, is changed to an enum with
support for the following backward-compatible values:
- "off", the default, to not use compression.
- "pglz" or "on", to compress FPWs with PGLZ.
- "lz4", the new mode, to compress FPWs with LZ4.

Benchmarking has showed that LZ4 outclasses easily PGLZ.  ZSTD would be
also an interesting choice, but going just with LZ4 for now makes the
patch minimalistic as toast compression is already able to use LZ4, so
there is no need to worry about any build-related needs for this
implementation.

Author: Andrey Borodin, Justin Pryzby
Reviewed-by: Dilip Kumar, Michael Paquier
Discussion: https://postgr.es/m/3037310D-ECB7-4BF1-AF20-01C10BB33A33@yandex-team.ru

13 files changed:
doc/src/sgml/config.sgml
doc/src/sgml/install-windows.sgml
doc/src/sgml/installation.sgml
doc/src/sgml/standalone-profile.xsl
src/backend/access/transam/xlog.c
src/backend/access/transam/xloginsert.c
src/backend/access/transam/xlogreader.c
src/backend/utils/misc/guc.c
src/backend/utils/misc/postgresql.conf.sample
src/bin/pg_waldump/pg_waldump.c
src/include/access/xlog.h
src/include/access/xlogrecord.h
src/tools/pgindent/typedefs.list

index 3eee9883595ff2eca518893d375f526f6e46657f..6098f6b0202ea8d863105bd6889a62e0e4b1f27b 100644 (file)
@@ -3128,23 +3128,27 @@ include_dir 'conf.d'
      </varlistentry>
 
      <varlistentry id="guc-wal-compression" xreflabel="wal_compression">
-      <term><varname>wal_compression</varname> (<type>boolean</type>)
+      <term><varname>wal_compression</varname> (<type>enum</type>)
       <indexterm>
        <primary><varname>wal_compression</varname> configuration parameter</primary>
       </indexterm>
       </term>
       <listitem>
        <para>
-        When this parameter is <literal>on</literal>, the <productname>PostgreSQL</productname>
+        This parameter enables compression of WAL using the specified 
+        compression method.
+        When enabled, the <productname>PostgreSQL</productname>
         server compresses full page images written to WAL when
         <xref linkend="guc-full-page-writes"/> is on or during a base backup.
         A compressed page image will be decompressed during WAL replay.
-        The default value is <literal>off</literal>.
-        Only superusers can change this setting.
+        The supported methods are <literal>pglz</literal> and
+        <literal>lz4</literal> (if <productname>PostgreSQL</productname> was
+        compiled with <option>--with-lz4</option>). The default value is
+        <literal>off</literal>. Only superusers can change this setting.
        </para>
 
        <para>
-        Turning this parameter on can reduce the WAL volume without
+        Enabling compression can reduce the WAL volume without
         increasing the risk of unrecoverable data corruption,
         but at the cost of some extra CPU spent on the compression during
         WAL logging and on the decompression during WAL replay.
index 312edc6f7aa3a214cab80efd327adb42e9cec561..ba794b8c934b33f5f4bbeabec9142678c26d18d2 100644 (file)
@@ -299,7 +299,7 @@ $ENV{MSBFLAGS}="/m";
      <term><productname>LZ4</productname></term>
      <listitem><para>
       Required for supporting <productname>LZ4</productname> compression
-      method for compressing the table data. Binaries and source can be
+      method for compressing table or WAL data. Binaries and source can be
       downloaded from
       <ulink url="https://github.com/lz4/lz4/releases"></ulink>.
      </para></listitem>
index 3c0aa118c76bfa8ed473a7cc1ad02eabc0e0e7bb..61d0bc8c43f3378befae315dfe7fc0f53796e8f9 100644 (file)
@@ -270,7 +270,8 @@ su - postgres
      <para>
       You need <productname>LZ4</productname>, if you want to support
       compression of data with this method; see
-      <xref linkend="guc-default-toast-compression"/>.
+      <xref linkend="guc-default-toast-compression"/> and
+      <xref linkend="guc-wal-compression"/>.
      </para>
     </listitem>
 
@@ -980,7 +981,7 @@ build-postgresql:
         <para>
          Build with <productname>LZ4</productname> compression support.
          This allows the use of <productname>LZ4</productname> for
-         compression of table data.
+         compression of table and WAL data.
         </para>
        </listitem>
       </varlistentry>
index 8bdf58632cd1149130d160775491437c1fffcb39..d748076a058e2c83dc8ccdd014d9ba8767ea3324 100644 (file)
@@ -52,6 +52,10 @@ variant without links and references to the main documentation.
   <xsl:text>the configuration parameter default_toast_compression</xsl:text>
 </xsl:template>
 
+<xsl:template match="xref[@linkend='guc-wal-compression']">
+  <xsl:text>the configuration parameter wal_compression</xsl:text>
+</xsl:template>
+
 <xsl:template match="xref[@linkend='install-windows']">
   <xsl:text>the documentation</xsl:text>
 </xsl:template>
index 2c6e21bea5a811de134b1e48f9db1784ae411704..9cbca6392d3378fc237f47886e677b9bf57fc831 100644 (file)
@@ -98,7 +98,7 @@ char     *XLogArchiveCommand = NULL;
 bool       EnableHotStandby = false;
 bool       fullPageWrites = true;
 bool       wal_log_hints = false;
-bool       wal_compression = false;
+int            wal_compression = WAL_COMPRESSION_NONE;
 char      *wal_consistency_checking_string = NULL;
 bool      *wal_consistency_checking = NULL;
 bool       wal_init_zero = true;
index 32b4cc84e79aaae8fb77266078a4a7bb9cf63072..10b3b090535192f692eb13c6c8f5aaa340707a10 100644 (file)
 #include "storage/proc.h"
 #include "utils/memutils.h"
 
-/* Buffer size required to store a compressed version of backup block image */
-#define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ)
+/*
+ * Guess the maximum buffer size required to store a compressed version of
+ * backup block image.
+ */
+#ifdef USE_LZ4
+#include <lz4.h>
+#define    LZ4_MAX_BLCKSZ      LZ4_COMPRESSBOUND(BLCKSZ)
+#else
+#define LZ4_MAX_BLCKSZ     0
+#endif
+
+#define PGLZ_MAX_BLCKSZ        PGLZ_MAX_OUTPUT(BLCKSZ)
+
+#define COMPRESS_BUFSIZE   Max(PGLZ_MAX_BLCKSZ, LZ4_MAX_BLCKSZ)
 
 /*
  * For each block reference registered with XLogRegisterBuffer, we fill in
@@ -58,7 +70,7 @@ typedef struct
                                 * backup block data in XLogRecordAssemble() */
 
    /* buffer to store a compressed version of backup block image */
-   char        compressed_page[PGLZ_MAX_BLCKSZ];
+   char        compressed_page[COMPRESS_BUFSIZE];
 } registered_buffer;
 
 static registered_buffer *registered_buffers;
@@ -628,7 +640,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
            /*
             * Try to compress a block image if wal_compression is enabled
             */
-           if (wal_compression)
+           if (wal_compression != WAL_COMPRESSION_NONE)
            {
                is_compressed =
                    XLogCompressBackupBlock(page, bimg.hole_offset,
@@ -665,8 +677,29 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 
            if (is_compressed)
            {
+               /* The current compression is stored in the WAL record */
                bimg.length = compressed_len;
-               bimg.bimg_info |= BKPIMAGE_IS_COMPRESSED;
+
+               /* Set the compression method used for this block */
+               switch ((WalCompression) wal_compression)
+               {
+                   case WAL_COMPRESSION_PGLZ:
+                       bimg.bimg_info |= BKPIMAGE_COMPRESS_PGLZ;
+                       break;
+
+                   case WAL_COMPRESSION_LZ4:
+#ifdef USE_LZ4
+                       bimg.bimg_info |= BKPIMAGE_COMPRESS_LZ4;
+#else
+                       elog(ERROR, "LZ4 is not supported by this build");
+#endif
+                       break;
+
+                   case WAL_COMPRESSION_NONE:
+                       Assert(false);  /* cannot happen */
+                       break;
+                       /* no default case, so that compiler will warn */
+               }
 
                rdt_datas_last->data = regbuf->compressed_page;
                rdt_datas_last->len = compressed_len;
@@ -853,12 +886,34 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
    else
        source = page;
 
+   switch ((WalCompression) wal_compression)
+   {
+       case WAL_COMPRESSION_PGLZ:
+           len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
+           break;
+
+       case WAL_COMPRESSION_LZ4:
+#ifdef USE_LZ4
+           len = LZ4_compress_default(source, dest, orig_len,
+                                      COMPRESS_BUFSIZE);
+           if (len <= 0)
+               len = -1;       /* failure */
+#else
+           elog(ERROR, "LZ4 is not supported by this build");
+#endif
+           break;
+
+       case WAL_COMPRESSION_NONE:
+           Assert(false);      /* cannot happen */
+           break;
+           /* no default case, so that compiler will warn */
+   }
+
    /*
-    * We recheck the actual size even if pglz_compress() reports success and
-    * see if the number of bytes saved by compression is larger than the
-    * length of extra data needed for the compressed version of block image.
+    * We recheck the actual size even if compression reports success and see
+    * if the number of bytes saved by compression is larger than the length
+    * of extra data needed for the compressed version of block image.
     */
-   len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default);
    if (len >= 0 &&
        len + extra_bytes < orig_len)
    {
index 42738eb940c2caf39bc9aa7920e62f39a081b17f..9a2cdf888e281d9dd7c11168b74f2137b838ef84 100644 (file)
@@ -18,6 +18,9 @@
 #include "postgres.h"
 
 #include <unistd.h>
+#ifdef USE_LZ4
+#include <lz4.h>
+#endif
 
 #include "access/transam.h"
 #include "access/xlog_internal.h"
@@ -1290,7 +1293,7 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
 
                blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0);
 
-               if (blk->bimg_info & BKPIMAGE_IS_COMPRESSED)
+               if (BKPIMAGE_COMPRESSED(blk->bimg_info))
                {
                    if (blk->bimg_info & BKPIMAGE_HAS_HOLE)
                        COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
@@ -1335,29 +1338,28 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
                }
 
                /*
-                * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED
-                * flag is set.
+                * Cross-check that bimg_len < BLCKSZ if it is compressed.
                 */
-               if ((blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
+               if (BKPIMAGE_COMPRESSED(blk->bimg_info) &&
                    blk->bimg_len == BLCKSZ)
                {
                    report_invalid_record(state,
-                                         "BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X",
+                                         "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X",
                                          (unsigned int) blk->bimg_len,
                                          LSN_FORMAT_ARGS(state->ReadRecPtr));
                    goto err;
                }
 
                /*
-                * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor
-                * IS_COMPRESSED flag is set.
+                * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE is
+                * set nor COMPRESSED().
                 */
                if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) &&
-                   !(blk->bimg_info & BKPIMAGE_IS_COMPRESSED) &&
+                   !BKPIMAGE_COMPRESSED(blk->bimg_info) &&
                    blk->bimg_len != BLCKSZ)
                {
                    report_invalid_record(state,
-                                         "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X",
+                                         "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X",
                                          (unsigned int) blk->data_len,
                                          LSN_FORMAT_ARGS(state->ReadRecPtr));
                    goto err;
@@ -1555,17 +1557,49 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
    bkpb = &record->blocks[block_id];
    ptr = bkpb->bkp_image;
 
-   if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED)
+   if (BKPIMAGE_COMPRESSED(bkpb->bimg_info))
    {
        /* If a backup block image is compressed, decompress it */
-       if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
-                           BLCKSZ - bkpb->hole_length, true) < 0)
+       bool        decomp_success = true;
+
+       if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
+       {
+           if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
+                               BLCKSZ - bkpb->hole_length, true) < 0)
+               decomp_success = false;
+       }
+       else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
+       {
+#ifdef USE_LZ4
+           if (LZ4_decompress_safe(ptr, tmp.data,
+                                   bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0)
+               decomp_success = false;
+#else
+           report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d",
+                                 (uint32) (record->ReadRecPtr >> 32),
+                                 (uint32) record->ReadRecPtr,
+                                 "LZ4",
+                                 block_id);
+           return false;
+#endif
+       }
+       else
+       {
+           report_invalid_record(record, "image at %X/%X compressed with unknown method, block %d",
+                                 (uint32) (record->ReadRecPtr >> 32),
+                                 (uint32) record->ReadRecPtr,
+                                 block_id);
+           return false;
+       }
+
+       if (!decomp_success)
        {
            report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
                                  LSN_FORMAT_ARGS(record->ReadRecPtr),
                                  block_id);
            return false;
        }
+
        ptr = tmp.data;
    }
 
index 297e705b806a6c0ad069bd1fda7d03b55810d0eb..480e8cd19914e373233c3f69f6e0e82953d559c0 100644 (file)
@@ -540,6 +540,22 @@ static struct config_enum_entry default_toast_compression_options[] = {
    {NULL, 0, false}
 };
 
+static const struct config_enum_entry wal_compression_options[] = {
+   {"pglz", WAL_COMPRESSION_PGLZ, false},
+#ifdef USE_LZ4
+   {"lz4", WAL_COMPRESSION_LZ4, false},
+#endif
+   {"on", WAL_COMPRESSION_PGLZ, false},
+   {"off", WAL_COMPRESSION_NONE, false},
+   {"true", WAL_COMPRESSION_PGLZ, true},
+   {"false", WAL_COMPRESSION_NONE, true},
+   {"yes", WAL_COMPRESSION_PGLZ, true},
+   {"no", WAL_COMPRESSION_NONE, true},
+   {"1", WAL_COMPRESSION_PGLZ, true},
+   {"0", WAL_COMPRESSION_NONE, true},
+   {NULL, 0, false}
+};
+
 /*
  * Options for enum values stored in other modules
  */
@@ -1304,16 +1320,6 @@ static struct config_bool ConfigureNamesBool[] =
        NULL, NULL, NULL
    },
 
-   {
-       {"wal_compression", PGC_SUSET, WAL_SETTINGS,
-           gettext_noop("Compresses full-page writes written in WAL file."),
-           NULL
-       },
-       &wal_compression,
-       false,
-       NULL, NULL, NULL
-   },
-
    {
        {"wal_init_zero", PGC_SUSET, WAL_SETTINGS,
            gettext_noop("Writes zeroes to new WAL files before first use."),
@@ -4816,6 +4822,16 @@ static struct config_enum ConfigureNamesEnum[] =
        NULL, NULL, NULL
    },
 
+   {
+       {"wal_compression", PGC_SUSET, WAL_SETTINGS,
+           gettext_noop("Compresses full-page writes written in WAL file with specified method."),
+           NULL
+       },
+       &wal_compression,
+       WAL_COMPRESSION_NONE, wal_compression_options,
+       NULL, NULL, NULL
+   },
+
    {
        {"wal_level", PGC_POSTMASTER, WAL_SETTINGS,
            gettext_noop("Sets the level of information written to the WAL."),
index af04ec3c744e76dbdc7eb585e0ea3684cc46183c..b696abfe54112dc79453ca7b9918f5d8ff1f447e 100644 (file)
 #full_page_writes = on         # recover from partial page writes
 #wal_log_hints = off           # also do full page writes of non-critical updates
                    # (change requires restart)
-#wal_compression = off         # enable compression of full-page writes
+#wal_compression = off         # enables compression of full-page writes;
+                   # off, pglz, lz4, or on
 #wal_init_zero = on            # zero-fill new WAL files
 #wal_recycle = on          # recycle WAL files
 #wal_buffers = -1          # min 32kB, -1 sets based on shared_buffers
index f8b8afe4a7bebb2278c4ff07cd6bafca83cb75b9..d83847b276deb6263cb82ed61864aae5b5786d5e 100644 (file)
@@ -537,18 +537,29 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
                   blk);
            if (XLogRecHasBlockImage(record, block_id))
            {
-               if (record->blocks[block_id].bimg_info &
-                   BKPIMAGE_IS_COMPRESSED)
+               uint8       bimg_info = record->blocks[block_id].bimg_info;
+
+               if (BKPIMAGE_COMPRESSED(bimg_info))
                {
+                   const char *method;
+
+                   if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0)
+                       method = "pglz";
+                   else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0)
+                       method = "lz4";
+                   else
+                       method = "unknown";
+
                    printf(" (FPW%s); hole: offset: %u, length: %u, "
-                          "compression saved: %u",
+                          "compression saved: %u, method: %s",
                           XLogRecBlockImageApply(record, block_id) ?
                           "" : " for WAL verification",
                           record->blocks[block_id].hole_offset,
                           record->blocks[block_id].hole_length,
                           BLCKSZ -
                           record->blocks[block_id].hole_length -
-                          record->blocks[block_id].bimg_len);
+                          record->blocks[block_id].bimg_len,
+                          method);
                }
                else
                {
index 7510e882287207a6f230777b0c104d2bb592713c..ccfcf43d62a72b50675ae3cc89d8065a306f6cec 100644 (file)
@@ -116,7 +116,7 @@ extern char *XLogArchiveCommand;
 extern bool EnableHotStandby;
 extern bool fullPageWrites;
 extern bool wal_log_hints;
-extern bool wal_compression;
+extern int wal_compression;
 extern bool wal_init_zero;
 extern bool wal_recycle;
 extern bool *wal_consistency_checking;
@@ -167,6 +167,14 @@ typedef enum WalLevel
    WAL_LEVEL_LOGICAL
 } WalLevel;
 
+/* Compression algorithms for WAL */
+typedef enum WalCompression
+{
+   WAL_COMPRESSION_NONE = 0,
+   WAL_COMPRESSION_PGLZ,
+   WAL_COMPRESSION_LZ4
+} WalCompression;
+
 /* Recovery states */
 typedef enum RecoveryState
 {
index 80c92a2498a32b1528e529edf8f18ac4a4cba61e..e06ee92a5e54e40a7958207d0d52eee4715a0b0c 100644 (file)
@@ -114,8 +114,8 @@ typedef struct XLogRecordBlockHeader
  * present is (BLCKSZ - <length of "hole" bytes>).
  *
  * Additionally, when wal_compression is enabled, we will try to compress full
- * page images using the PGLZ compression algorithm, after removing the "hole".
- * This can reduce the WAL volume, but at some extra cost of CPU spent
+ * page images using one of the supported algorithms, after removing the
+ * "hole". This can reduce the WAL volume, but at some extra cost of CPU spent
  * on the compression during WAL logging. In this case, since the "hole"
  * length cannot be calculated by subtracting the number of page image bytes
  * from BLCKSZ, basically it needs to be stored as an extra information.
@@ -134,7 +134,7 @@ typedef struct XLogRecordBlockImageHeader
    uint8       bimg_info;      /* flag bits, see below */
 
    /*
-    * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an
+    * If BKPIMAGE_HAS_HOLE and BKPIMAGE_COMPRESSED(), an
     * XLogRecordBlockCompressHeader struct follows.
     */
 } XLogRecordBlockImageHeader;
@@ -144,9 +144,13 @@ typedef struct XLogRecordBlockImageHeader
 
 /* Information stored in bimg_info */
 #define BKPIMAGE_HAS_HOLE      0x01    /* page image has "hole" */
-#define BKPIMAGE_IS_COMPRESSED     0x02    /* page image is compressed */
-#define BKPIMAGE_APPLY     0x04    /* page image should be restored during
-                                    * replay */
+#define BKPIMAGE_APPLY         0x02    /* page image should be restored
+                                        * during replay */
+/* compression methods supported */
+#define BKPIMAGE_COMPRESS_PGLZ 0x04
+#define BKPIMAGE_COMPRESS_LZ4  0x08
+#define    BKPIMAGE_COMPRESSED(info) \
+   ((info & (BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4)) != 0)
 
 /*
  * Extra header information used when page image has "hole" and
index 1b3da854214ce35a7861fe74a11f34ed671f699b..64c06cf95235951db5688b38338a71436cff0502 100644 (file)
@@ -2843,6 +2843,7 @@ WaitEventSet
 WaitEventTimeout
 WaitPMResult
 WalCloseMethod
+WalCompression
 WalLevel
 WalRcvData
 WalRcvExecResult