diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index 4963e9245cb5..db539943f468 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -1786,12 +1786,13 @@ check_tuple_attribute(HeapCheckContext *ctx) bool valid = false; /* Compressed attributes should have a valid compression method */ - cmid = TOAST_COMPRESS_METHOD(&toast_pointer); + cmid = VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer); switch (cmid) { /* List of all valid compression method IDs */ case TOAST_PGLZ_COMPRESSION_ID: case TOAST_LZ4_COMPRESSION_ID: + case TOAST_ZSTD_COMPRESSION_ID: valid = true; break; diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index aa5b8772436c..ac00b2c67b8d 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -1240,19 +1240,21 @@ - - attcompression char - - - The current compression method of the column. Typically this is - '\0' to specify use of the current default setting - (see ). Otherwise, - 'p' selects pglz compression, while - 'l' selects LZ4 - compression. However, this field is ignored - whenever attstorage does not allow - compression. - + + + attcompression char + + + The current compression method of the column. Typically this is + '\0' to specify use of the current default setting + (see ). Otherwise, + 'p' selects pglz compression, while + 'l' selects LZ4 compression, + and 'z' selects ZSTD compression. + However, this field is ignored whenever + attstorage does not allow compression. + + diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index c7acc0f182f3..072041d35dab 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -3404,8 +3404,8 @@ include_dir 'conf.d' A compressed page image will be decompressed during WAL replay. The supported methods are pglz, lz4 (if PostgreSQL - was compiled with ) and - zstd (if PostgreSQL + was compiled with ), + and zstd (if PostgreSQL was compiled with ). The default value is off. Only superusers and users with the appropriate SET @@ -9824,9 +9824,11 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; the COMPRESSION column option in CREATE TABLE or ALTER TABLE.) - The supported compression methods are pglz and - (if PostgreSQL was compiled with - ) lz4. + The supported compression methods are pglz, + lz4 (if PostgreSQL + was compiled with ), + and zstd (if PostgreSQL + was compiled with ). The default is pglz. diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 6b327d4fd81c..95dd0484660f 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -25298,6 +25298,26 @@ SELECT * FROM pg_ls_dir('.') WITH ORDINALITY AS t(ls,n); This is equivalent to current_user. + + + + + pg_compression_available + + pg_compression_available ( text ) + boolean + + + Returns true if the given compression method name is supported in this PostgreSQL build. + The built-in compression method pglz is always available because it is included + in the core server code and does not require any additional compile-time option or external library. + In contrast, lz4 will return true only if PostgreSQL was + compiled with the --with-lz4 flag, and zstd will return + true only if compiled with the --with-zstd flag. + If the input name does not match any supported method, the function returns false. + + + diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml index 1e4f26c13f65..48bdde4f2289 100644 --- a/doc/src/sgml/ref/alter_table.sgml +++ b/doc/src/sgml/ref/alter_table.sgml @@ -443,10 +443,10 @@ WITH ( MODULUS numeric_literal, REM its existing compression method, rather than being recompressed with the compression method of the target column. The supported compression - methods are pglz and lz4. - (lz4 is available only if - was used when building PostgreSQL.) In - addition, compression_method + methods are pglz, + lz4 (if PostgreSQL was compiled with ), + and zstd (if PostgreSQL was compiled with ). + In addition, compression_method can be default, which selects the default behavior of consulting the setting at the time of data insertion to determine the method to use. diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml index dc000e913c14..6af08276e8a9 100644 --- a/doc/src/sgml/ref/create_table.sgml +++ b/doc/src/sgml/ref/create_table.sgml @@ -337,16 +337,19 @@ WITH ( MODULUS numeric_literal, REM The COMPRESSION clause sets the compression method for the column. Compression is supported only for variable-width data - types, and is used only when the column's storage mode + types, and is used only when the column’s storage mode is main or extended. (See for information on column storage modes.) Setting this property for a partitioned table has no direct effect, because such tables have no storage of their own, but the configured value will be inherited by newly-created partitions. - The supported compression methods are pglz and - lz4. (lz4 is available only if - was used when building - PostgreSQL.) In addition, + The supported compression methods are + pglz, + lz4 (if PostgreSQL + was compiled with ), + and zstd (if PostgreSQL + was compiled with ). + In addition, compression_method can be default to explicitly specify the default behavior, which is to consult the diff --git a/src/backend/access/common/detoast.c b/src/backend/access/common/detoast.c index 626517877422..6a2e6c9683d2 100644 --- a/src/backend/access/common/detoast.c +++ b/src/backend/access/common/detoast.c @@ -246,10 +246,10 @@ detoast_attr_slice(struct varlena *attr, * Determine maximum amount of compressed data needed for a prefix * of a given length (after decompression). * - * At least for now, if it's LZ4 data, we'll have to fetch the - * whole thing, because there doesn't seem to be an API call to - * determine how much compressed data we need to be sure of being - * able to decompress the required slice. + * At least for now, if it's LZ4 or Zstandard data, we'll have to + * fetch the whole thing, because there doesn't seem to be an API + * call to determine how much compressed data we need to be sure + * of being able to decompress the required slice. */ if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) == TOAST_PGLZ_COMPRESSION_ID) @@ -478,13 +478,15 @@ toast_decompress_datum(struct varlena *attr) * Fetch the compression method id stored in the compression header and * decompress the data using the appropriate decompression routine. */ - cmid = TOAST_COMPRESS_METHOD(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: return pglz_decompress_datum(attr); case TOAST_LZ4_COMPRESSION_ID: return lz4_decompress_datum(attr); + case TOAST_ZSTD_COMPRESSION_ID: + return zstd_decompress_datum(attr); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ @@ -514,20 +516,22 @@ toast_decompress_datum_slice(struct varlena *attr, int32 slicelength) * have been seen to give wrong results if passed an output size that is * more than the data's true decompressed size. */ - if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr)) + if ((uint32) slicelength >= VARDATA_COMPRESSED_GET_EXTSIZE(attr)) return toast_decompress_datum(attr); /* * Fetch the compression method id stored in the compression header and * decompress the data slice using the appropriate decompression routine. */ - cmid = TOAST_COMPRESS_METHOD(attr); + cmid = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(attr); switch (cmid) { case TOAST_PGLZ_COMPRESSION_ID: return pglz_decompress_datum_slice(attr, slicelength); case TOAST_LZ4_COMPRESSION_ID: return lz4_decompress_datum_slice(attr, slicelength); + case TOAST_ZSTD_COMPRESSION_ID: + return zstd_decompress_datum_slice(attr, slicelength); default: elog(ERROR, "invalid compression method id %d", cmid); return NULL; /* keep compiler quiet */ diff --git a/src/backend/access/common/toast_compression.c b/src/backend/access/common/toast_compression.c index 21f2f4af97e3..37c85c7fb18f 100644 --- a/src/backend/access/common/toast_compression.c +++ b/src/backend/access/common/toast_compression.c @@ -17,19 +17,34 @@ #include #endif +#ifdef USE_ZSTD +#include +#endif + #include "access/detoast.h" #include "access/toast_compression.h" #include "common/pg_lzcompress.h" #include "varatt.h" +#include "fmgr.h" +#include "parser/scansup.h" +#include "utils/builtins.h" /* GUC */ int default_toast_compression = TOAST_PGLZ_COMPRESSION; -#define NO_LZ4_SUPPORT() \ +#ifdef USE_ZSTD +#define ZSTD_CHECK_ERROR(zstd_ret, msg) \ + do { \ + if (ZSTD_isError(zstd_ret)) \ + ereport(ERROR, (errmsg("%s: %s", (msg), ZSTD_getErrorName(zstd_ret)))); \ + } while (0) +#endif + +#define COMPRESSION_METHOD_NOT_SUPPORTED(method) \ ereport(ERROR, \ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \ - errmsg("compression method lz4 not supported"), \ - errdetail("This functionality requires the server to be built with lz4 support."))) + errmsg("compression method %s not supported", method), \ + errdetail("This functionality requires the server to be built with %s support.", method))) /* * Compress a varlena using PGLZ. @@ -139,7 +154,7 @@ struct varlena * lz4_compress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 valsize; @@ -182,7 +197,7 @@ struct varlena * lz4_decompress_datum(const struct varlena *value) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -215,7 +230,7 @@ struct varlena * lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); return NULL; /* keep compiler quiet */ #else int32 rawsize; @@ -245,6 +260,133 @@ lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength) #endif } +/* Compress datum using ZSTD */ +struct varlena * +zstd_compress_datum(const struct varlena *value) +{ +#ifdef USE_ZSTD + uint32 valsize = VARSIZE_ANY_EXHDR(value); + size_t max_size = ZSTD_compressBound(valsize); + struct varlena *compressed; + size_t cmp_size; + + /* Allocate space for the compressed varlena (header + data) */ + compressed = (struct varlena *) palloc(max_size + VARHDRSZ_4BCE); + + cmp_size = ZSTD_compress(VARDATA_4BCE(compressed), + max_size, + VARDATA_ANY(value), + valsize, + ZSTD_CLEVEL_DEFAULT); + + if (ZSTD_isError(cmp_size)) + { + pfree(compressed); + ZSTD_CHECK_ERROR(cmp_size, "ZSTD compression failed"); + } + + /** + * If compression did not reduce size, return NULL so that the uncompressed data is stored + */ + if (cmp_size > valsize) + { + pfree(compressed); + return NULL; + } + + /* Set the compressed size in the varlena header */ + SET_VARSIZE_COMPRESSED(compressed, cmp_size + VARHDRSZ_4BCE); + + return compressed; + +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + +/* Decompression routine */ +struct varlena * +zstd_decompress_datum(const struct varlena *value) +{ +#ifdef USE_ZSTD + /* ZSTD no dictionary compression */ + uint32 actual_size_exhdr = VARDATA_COMPRESSED_GET_EXTSIZE(value); + uint32 cmplen; + struct varlena *result; + size_t ucmplen; + + cmplen = VARSIZE_ANY(value) - VARHDRSZ_4BCE; + + /* Allocate space for the uncompressed data */ + result = (struct varlena *) palloc(actual_size_exhdr + VARHDRSZ); + + ucmplen = ZSTD_decompress(VARDATA(result), + actual_size_exhdr, + VARDATA_4BCE(value), + cmplen); + + if (ZSTD_isError(ucmplen)) + { + pfree(result); + ZSTD_CHECK_ERROR(ucmplen, "ZSTD decompression failed"); + } + + /* Set final size in the varlena header */ + SET_VARSIZE(result, ucmplen + VARHDRSZ); + return result; + +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + +/* Decompress a slice of the datum */ +struct varlena * +zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength) +{ +#ifdef USE_ZSTD + /* ZSTD no dictionary compression */ + + struct varlena *result; + ZSTD_inBuffer inBuf; + ZSTD_outBuffer outBuf; + size_t ret; + ZSTD_DCtx *zstdDctx = ZSTD_createDCtx(); + + inBuf.src = VARDATA_4BCE(value); + inBuf.size = VARSIZE_ANY(value) - VARHDRSZ_4BCE; + inBuf.pos = 0; + + result = (struct varlena *) palloc(slicelength + VARHDRSZ); + outBuf.dst = (char *) result + VARHDRSZ; + outBuf.size = slicelength; + outBuf.pos = 0; + + /* Common decompression loop */ + while (inBuf.pos < inBuf.size && outBuf.pos < outBuf.size) + { + ret = ZSTD_decompressStream(zstdDctx, &outBuf, &inBuf); + if (ZSTD_isError(ret)) + { + pfree(result); + ZSTD_freeDCtx(zstdDctx); + ZSTD_CHECK_ERROR(ret, "zstd decompression failed"); + } + } + + Assert(outBuf.size == slicelength && outBuf.pos == slicelength); + SET_VARSIZE(result, outBuf.pos + VARHDRSZ); + ZSTD_freeDCtx(zstdDctx); + + return result; +#else + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); + return NULL; +#endif +} + /* * Extract compression ID from a varlena. * @@ -289,10 +431,17 @@ CompressionNameToMethod(const char *compression) else if (strcmp(compression, "lz4") == 0) { #ifndef USE_LZ4 - NO_LZ4_SUPPORT(); + COMPRESSION_METHOD_NOT_SUPPORTED("lz4"); #endif return TOAST_LZ4_COMPRESSION; } + else if (strcmp(compression, "zstd") == 0) + { +#ifndef USE_ZSTD + COMPRESSION_METHOD_NOT_SUPPORTED("zstd"); +#endif + return TOAST_ZSTD_COMPRESSION; + } return InvalidCompressionMethod; } @@ -309,8 +458,39 @@ GetCompressionMethodName(char method) return "pglz"; case TOAST_LZ4_COMPRESSION: return "lz4"; + case TOAST_ZSTD_COMPRESSION: + return "zstd"; default: elog(ERROR, "invalid compression method %c", method); return NULL; /* keep compiler quiet */ } } + +/* + * pg_compression_available(text) → bool + * + * True if the named TOAST compressor method was compiled into this server. + */ +Datum +pg_compression_available(PG_FUNCTION_ARGS) +{ + text *name = PG_GETARG_TEXT_PP(0); + char *cname = downcase_truncate_identifier(text_to_cstring(name), + NAMEDATALEN, false); + + /* pglz is always there */ + if (strcmp(cname, "pglz") == 0) + PG_RETURN_BOOL(true); + +#ifdef USE_LZ4 + if (strcmp(cname, "lz4") == 0) + PG_RETURN_BOOL(true); +#endif + +#ifdef USE_ZSTD + if (strcmp(cname, "zstd") == 0) + PG_RETURN_BOOL(true); +#endif + + PG_RETURN_BOOL(false); +} diff --git a/src/backend/access/common/toast_internals.c b/src/backend/access/common/toast_internals.c index 7d8be8346ce5..500443a3535b 100644 --- a/src/backend/access/common/toast_internals.c +++ b/src/backend/access/common/toast_internals.c @@ -71,6 +71,10 @@ toast_compress_datum(Datum value, char cmethod) tmp = lz4_compress_datum((const struct varlena *) value); cmid = TOAST_LZ4_COMPRESSION_ID; break; + case TOAST_ZSTD_COMPRESSION: + tmp = zstd_compress_datum((const struct varlena *) value); + cmid = TOAST_ZSTD_COMPRESSION_ID; + break; default: elog(ERROR, "invalid compression method %c", cmethod); } @@ -143,6 +147,7 @@ toast_save_datum(Relation rel, Datum value, Pointer dval = DatumGetPointer(value); int num_indexes; int validIndex; + ToastCompressionId cm = TOAST_INVALID_COMPRESSION_ID; Assert(!VARATT_IS_EXTERNAL(value)); @@ -183,10 +188,11 @@ toast_save_datum(Relation rel, Datum value, data_todo = VARSIZE(dval) - VARHDRSZ; /* rawsize in a compressed datum is just the size of the payload */ toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ; + cm = VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval); /* set external size and compression method */ - VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo, - VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval)); + VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo, cm); + /* Assert that the numbers look like it's compressed */ Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer)); } @@ -368,9 +374,9 @@ toast_save_datum(Relation rel, Datum value, /* * Create the TOAST pointer value that we'll return */ - result = (struct varlena *) palloc(TOAST_POINTER_SIZE); + result = (struct varlena *) palloc(TOAST_POINTER_SIZE(cm)); SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK); - memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer)); + memcpy(VARDATA_EXTERNAL(result), &toast_pointer, TOAST_POINTER_SIZE(cm) - VARHDRSZ_EXTERNAL); return PointerGetDatum(result); } diff --git a/src/backend/access/table/toast_helper.c b/src/backend/access/table/toast_helper.c index b60fab0a4d29..5a52bb1b67f2 100644 --- a/src/backend/access/table/toast_helper.c +++ b/src/backend/access/table/toast_helper.c @@ -171,7 +171,7 @@ toast_tuple_init(ToastTupleContext *ttc) * The column must have attstorage EXTERNAL or EXTENDED if check_main is * false, and must have attstorage MAIN if check_main is true. * - * The column must have a minimum size of MAXALIGN(TOAST_POINTER_SIZE); + * The column must have a minimum size of MAXALIGN(TOAST_POINTER_NOEXT_SIZE); * if not, no benefit is to be expected by compressing it. * * The return value is the index of the biggest suitable column, or @@ -184,7 +184,7 @@ toast_tuple_find_biggest_attribute(ToastTupleContext *ttc, TupleDesc tupleDesc = ttc->ttc_rel->rd_att; int numAttrs = tupleDesc->natts; int biggest_attno = -1; - int32 biggest_size = MAXALIGN(TOAST_POINTER_SIZE); + int32 biggest_size = MAXALIGN(TOAST_POINTER_NOEXT_SIZE); int32 skip_colflags = TOASTCOL_IGNORE; int i; diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index ffae8c23abfa..fda7c766d7ec 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -4203,6 +4203,9 @@ pg_column_compression(PG_FUNCTION_ARGS) case TOAST_LZ4_COMPRESSION_ID: result = "lz4"; break; + case TOAST_ZSTD_COMPRESSION_ID: + result = "zstd"; + break; default: elog(ERROR, "invalid compression method id %d", cmid); } diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index d14b1678e7fe..6877973c13a1 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -460,6 +460,9 @@ static const struct config_enum_entry default_toast_compression_options[] = { {"pglz", TOAST_PGLZ_COMPRESSION, false}, #ifdef USE_LZ4 {"lz4", TOAST_LZ4_COMPRESSION, false}, +#endif +#ifdef USE_ZSTD + {"zstd", TOAST_ZSTD_COMPRESSION, false}, #endif {NULL, 0, false} }; diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index a9d8293474af..82a8daaba488 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -752,7 +752,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate #row_security = on #default_table_access_method = 'heap' #default_tablespace = '' # a tablespace name, '' uses the default -#default_toast_compression = 'pglz' # 'pglz' or 'lz4' +#default_toast_compression = 'pglz' # 'pglz' or 'lz4' or 'zstd' #temp_tablespaces = '' # a list of tablespace names, '' uses # only default tablespace #check_function_bodies = on diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c index 1937997ea674..e1f7025c997b 100644 --- a/src/bin/pg_dump/pg_dump.c +++ b/src/bin/pg_dump/pg_dump.c @@ -17590,6 +17590,9 @@ dumpTableSchema(Archive *fout, const TableInfo *tbinfo) case 'l': cmname = "lz4"; break; + case 'z': + cmname = "zstd"; + break; default: cmname = NULL; break; diff --git a/src/bin/psql/describe.c b/src/bin/psql/describe.c index dd25d2fe7b8a..e073f6766e8a 100644 --- a/src/bin/psql/describe.c +++ b/src/bin/psql/describe.c @@ -2172,8 +2172,9 @@ describeOneTableDetails(const char *schemaname, /* these strings are literal in our syntax, so not translated. */ printTableAddCell(&cont, (compression[0] == 'p' ? "pglz" : (compression[0] == 'l' ? "lz4" : - (compression[0] == '\0' ? "" : - "???"))), + (compression[0] == 'z' ? "zstd" : + (compression[0] == '\0' ? "" : + "???")))), false, false); } diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c index 6872653c6c82..e4bd51575ad2 100644 --- a/src/bin/psql/tab-complete.in.c +++ b/src/bin/psql/tab-complete.in.c @@ -2913,7 +2913,7 @@ match_previous_words(int pattern_id, /* ALTER TABLE ALTER [COLUMN] SET COMPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "COMPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "COMPRESSION")) - COMPLETE_WITH("DEFAULT", "PGLZ", "LZ4"); + COMPLETE_WITH("DEFAULT", "PGLZ", "LZ4", "ZSTD"); /* ALTER TABLE ALTER [COLUMN] SET EXPRESSION */ else if (Matches("ALTER", "TABLE", MatchAny, "ALTER", "COLUMN", MatchAny, "SET", "EXPRESSION") || Matches("ALTER", "TABLE", MatchAny, "ALTER", MatchAny, "SET", "EXPRESSION")) diff --git a/src/include/access/detoast.h b/src/include/access/detoast.h index e603a2276c38..ca8abaad6447 100644 --- a/src/include/access/detoast.h +++ b/src/include/access/detoast.h @@ -23,12 +23,16 @@ do { \ varattrib_1b_e *attre = (varattrib_1b_e *) (attr); \ Assert(VARATT_IS_EXTERNAL(attre)); \ - Assert(VARSIZE_EXTERNAL(attre) == sizeof(toast_pointer) + VARHDRSZ_EXTERNAL); \ - memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), sizeof(toast_pointer)); \ + memset(&(toast_pointer), 0, sizeof(toast_pointer)); \ + memcpy(&(toast_pointer), VARDATA_EXTERNAL(attre), VARSIZE_EXTERNAL(attre) - VARHDRSZ_EXTERNAL); \ } while (0) /* Size of an EXTERNAL datum that contains a standard TOAST pointer */ -#define TOAST_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_external)) +#define TOAST_POINTER_NOEXT_SIZE (VARHDRSZ_EXTERNAL + offsetof(varatt_external, extended)) +#define TOAST_POINTER_EXT_SIZE (TOAST_POINTER_NOEXT_SIZE + MEMBER_SIZE(varatt_external, extended.cmp)) + +#define TOAST_POINTER_SIZE(cm) \ + (TOAST_CMPID_EXTENDED(cm) ? TOAST_POINTER_EXT_SIZE : TOAST_POINTER_NOEXT_SIZE) /* Size of an EXTERNAL datum that contains an indirection pointer */ #define INDIRECT_POINTER_SIZE (VARHDRSZ_EXTERNAL + sizeof(varatt_indirect)) diff --git a/src/include/access/toast_compression.h b/src/include/access/toast_compression.h index 13c4612ceedc..0d7b521c4816 100644 --- a/src/include/access/toast_compression.h +++ b/src/include/access/toast_compression.h @@ -13,6 +13,10 @@ #ifndef TOAST_COMPRESSION_H #define TOAST_COMPRESSION_H +#ifdef USE_ZSTD +#include +#endif + /* * GUC support. * @@ -23,22 +27,28 @@ extern PGDLLIMPORT int default_toast_compression; /* - * Built-in compression method ID. The toast compression header will store - * this in the first 2 bits of the raw length. These built-in compression - * method IDs are directly mapped to the built-in compression methods. + * Built-in compression method ID. + * + * For TOAST-compressed values: + * - If using a non-extended method, the first 2 bits of the raw length + * field store this ID. + * - If using an extended method, it is stored in the extended 1-byte header. + * + * For varlena attributes using extended compression (varatt_external and varattr_4b): + * - The compression method ID occupies the first seven bits of va_extinfo. * - * Don't use these values for anything other than understanding the meaning - * of the raw bits from a varlena; in particular, if the goal is to identify - * a compression method, use the constants TOAST_PGLZ_COMPRESSION, etc. - * below. We might someday support more than 4 compression methods, but - * we can never have more than 4 values in this enum, because there are - * only 2 bits available in the places where this is stored. + * These IDs map directly to the built-in compression methods. + * + * Note: Do not use these values for anything other than interpreting the + * raw bits from a varlena. To identify a compression method in code, use + * the named constants (e.g., TOAST_PGLZ_COMPRESSION) instead. */ typedef enum ToastCompressionId { TOAST_PGLZ_COMPRESSION_ID = 0, TOAST_LZ4_COMPRESSION_ID = 1, - TOAST_INVALID_COMPRESSION_ID = 2, + TOAST_ZSTD_COMPRESSION_ID = 2, + TOAST_INVALID_COMPRESSION_ID = 3, } ToastCompressionId; /* @@ -48,9 +58,13 @@ typedef enum ToastCompressionId */ #define TOAST_PGLZ_COMPRESSION 'p' #define TOAST_LZ4_COMPRESSION 'l' +#define TOAST_ZSTD_COMPRESSION 'z' #define InvalidCompressionMethod '\0' #define CompressionMethodIsValid(cm) ((cm) != InvalidCompressionMethod) +#define TOAST_CMPID_EXTENDED(cmpid) (!(cmpid == TOAST_PGLZ_COMPRESSION_ID || \ + cmpid == TOAST_LZ4_COMPRESSION_ID || \ + cmpid == TOAST_INVALID_COMPRESSION_ID)) /* pglz compression/decompression routines */ @@ -65,6 +79,11 @@ extern struct varlena *lz4_decompress_datum(const struct varlena *value); extern struct varlena *lz4_decompress_datum_slice(const struct varlena *value, int32 slicelength); +/* zstd nodict compression/decompression routines */ +extern struct varlena *zstd_compress_datum(const struct varlena *value); +extern struct varlena *zstd_decompress_datum(const struct varlena *value); +extern struct varlena *zstd_decompress_datum_slice(const struct varlena *value, int32 slicelength); + /* other stuff */ extern ToastCompressionId toast_get_compression_id(struct varlena *attr); extern char CompressionNameToMethod(const char *compression); diff --git a/src/include/access/toast_internals.h b/src/include/access/toast_internals.h index 06ae8583c1e1..35277086f52f 100644 --- a/src/include/access/toast_internals.h +++ b/src/include/access/toast_internals.h @@ -17,32 +17,25 @@ #include "utils/relcache.h" #include "utils/snapshot.h" -/* - * The information at the start of the compressed toast data. - */ -typedef struct toast_compress_header -{ - int32 vl_len_; /* varlena header (do not touch directly!) */ - uint32 tcinfo; /* 2 bits for compression method and 30 bits - * external size; see va_extinfo */ -} toast_compress_header; - /* * Utilities for manipulation of header information for compressed * toast entries. */ -#define TOAST_COMPRESS_EXTSIZE(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo & VARLENA_EXTSIZE_MASK) -#define TOAST_COMPRESS_METHOD(ptr) \ - (((toast_compress_header *) (ptr))->tcinfo >> VARLENA_EXTSIZE_BITS) - -#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \ - do { \ - Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ - Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm_method) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_compress_header *) (ptr))->tcinfo = \ - (len) | ((uint32) (cm_method) << VARLENA_EXTSIZE_BITS); \ +#define TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(ptr, len, cm_method) \ + do { \ + Assert((len) > 0 && (len) <= VARLENA_EXTSIZE_MASK); \ + Assert((cm_method) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm_method) == TOAST_LZ4_COMPRESSION_ID || \ + (cm_method) == TOAST_ZSTD_COMPRESSION_ID); \ + if (!TOAST_CMPID_EXTENDED((cm_method))) \ + ((varattrib_4b *)(ptr))->va_compressed.va_tcinfo = ((uint32)(len)) | ((uint32)(cm_method) << VARLENA_EXTSIZE_BITS); \ + else \ + { \ + /* extended path: mark EXT flag in tcinfo */ \ + ((varattrib_4b *)(ptr))->va_compressed_ext.va_tcinfo = \ + ((uint32)(len)) | ((uint32)(VARATT_4BCE_EXTFLAG) << VARLENA_EXTSIZE_BITS); \ + VARATT_4BCE_SET_COMPRESS_METHOD(((varattrib_4b *)(ptr))->va_compressed_ext.va_ecinfo, (cm_method)); \ + } \ } while (0) extern Datum toast_compress_datum(Datum value, char cmethod); diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 1fc19146f467..12326199c732 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12572,4 +12572,9 @@ proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', prosrc => 'pg_get_aios' }, +{ oid => 9474, descr => 'is toast compression method available?', + proname => 'pg_compression_available', prokind => 'f', + provolatile => 'i', prorettype => 'bool', proargtypes => 'text', + prosrc => 'pg_compression_available' }, + ] diff --git a/src/include/varatt.h b/src/include/varatt.h index 2e8564d49980..4ca8dac814f5 100644 --- a/src/include/varatt.h +++ b/src/include/varatt.h @@ -28,14 +28,28 @@ * you need to memcpy from the tuple into a local struct variable before * you can look at these fields! (The reason we use memcmp is to avoid * having to do that just to detect equality of two TOAST pointers...) + * + * Optional trailer (only when va_extinfo top bits = 11): + * extended.cmp.va_ecinfo – 1 byte where: + * 1. Bits 7–1 encode (cmid − 2), so cmid ∈ [2…129]. + * 2. Bit 0 is a flag indicating if the algorithm expects extra metadata. */ typedef struct varatt_external { int32 va_rawsize; /* Original data size (includes header) */ uint32 va_extinfo; /* External saved size (without header) and - * compression method */ + * compression method or VARATT_4BCE_EXTFLAG + * flag */ Oid va_valueid; /* Unique ID of value within TOAST table */ Oid va_toastrelid; /* RelID of TOAST table containing it */ + /* -------- optional trailer -------- */ + union + { + struct /* compression-method trailer */ + { + uint8 va_ecinfo; /* Extended compression methods info */ + } cmp; + } extended; /* "extended" = optional bytes */ } varatt_external; /* @@ -93,11 +107,18 @@ typedef enum vartag_external #define VARTAG_IS_EXPANDED(tag) \ (((tag) & ~1) == VARTAG_EXPANDED_RO) -#define VARTAG_SIZE(tag) \ - ((tag) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ - VARTAG_IS_EXPANDED(tag) ? sizeof(varatt_expanded) : \ - (tag) == VARTAG_ONDISK ? sizeof(varatt_external) : \ - (AssertMacro(false), 0)) +#define MEMBER_SIZE(type, member) sizeof( ((type *)0)->member ) + +#define VARTAG_SIZE(PTR) \ +( \ + VARTAG_EXTERNAL(PTR) == VARTAG_INDIRECT ? sizeof(varatt_indirect) : \ + VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)) ? sizeof(varatt_expanded) : \ + VARTAG_EXTERNAL(PTR) == VARTAG_ONDISK ? \ + (offsetof(varatt_external, extended) + \ + ((READ_U32_UNALIGNED((const uint8 *)(PTR) + VARHDRSZ_EXTERNAL + \ + offsetof(varatt_external, va_extinfo)) >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_EXTFLAG \ + ? MEMBER_SIZE(varatt_external, extended.cmp) : 0)) : (AssertMacro(false), 0) \ +) /* * These structs describe the header of a varlena object that may have been @@ -122,6 +143,17 @@ typedef union * compression method; see va_extinfo */ char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Compressed data */ } va_compressed; + struct + { + uint32 va_header; + uint32 va_tcinfo; /* Original data size (excludes header) and + * compression method or VARATT_4BCE_EXTFLAG + * flag; see va_extinfo */ + uint8 va_ecinfo; /** va_ecinfo – 1 byte where: + * 1. Bits 7–1 encode (cmid − 2), so cmid ∈ [2…129]. + * 2. Bit 0 is a flag indicating if the algorithm expects extra metadata. */ + char va_data[FLEXIBLE_ARRAY_MEMBER]; + } va_compressed_ext; } varattrib_4b; typedef struct @@ -206,6 +238,18 @@ typedef struct (((varattrib_1b_e *) (PTR))->va_header = 0x80, \ ((varattrib_1b_e *) (PTR))->va_tag = (tag)) +/** + * Safely read a 32-bit unsigned integer from *any* address, even when + * that address is **not** naturally aligned to 4 bytes. We do the load + * one byte at a time and re-assemble the word in *host* byte order. + * For BIG ENDIAN systems. + */ +#define READ_U32_UNALIGNED(ptr) \ + ( (uint32) (((const uint8 *)(ptr))[3]) \ + | ((uint32)(((const uint8 *)(ptr))[2]) << 8) \ + | ((uint32)(((const uint8 *)(ptr))[1]) << 16) \ + | ((uint32)(((const uint8 *)(ptr))[0]) << 24) ) + #else /* !WORDS_BIGENDIAN */ #define VARATT_IS_4B(PTR) \ @@ -238,6 +282,17 @@ typedef struct #define SET_VARTAG_1B_E(PTR,tag) \ (((varattrib_1b_e *) (PTR))->va_header = 0x01, \ ((varattrib_1b_e *) (PTR))->va_tag = (tag)) +/** + * Safely read a 32-bit unsigned integer from *any* address, even when + * that address is **not** naturally aligned to 4 bytes. We do the load + * one byte at a time and re-assemble the word in *host* byte order. + * For LITTLE ENDIAN systems + */ +#define READ_U32_UNALIGNED(ptr) \ + ( (uint32) (((const uint8 *)(ptr))[0]) \ + | ((uint32)(((const uint8 *)(ptr))[1]) << 8) \ + | ((uint32)(((const uint8 *)(ptr))[2]) << 16) \ + | ((uint32)(((const uint8 *)(ptr))[3]) << 24) ) #endif /* WORDS_BIGENDIAN */ @@ -282,7 +337,7 @@ typedef struct #define VARDATA_SHORT(PTR) VARDATA_1B(PTR) #define VARTAG_EXTERNAL(PTR) VARTAG_1B_E(PTR) -#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(VARTAG_EXTERNAL(PTR))) +#define VARSIZE_EXTERNAL(PTR) (VARHDRSZ_EXTERNAL + VARTAG_SIZE(PTR)) #define VARDATA_EXTERNAL(PTR) VARDATA_1B_E(PTR) #define VARATT_IS_COMPRESSED(PTR) VARATT_IS_4B_C(PTR) @@ -325,23 +380,39 @@ typedef struct (VARATT_IS_1B(PTR) ? VARDATA_1B(PTR) : VARDATA_4B(PTR)) /* Decompressed size and compression method of a compressed-in-line Datum */ -#define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK) +#define VARDATA_COMPRESSED_GET_EXTSIZE(PTR) \ + ( \ + (VARATT_IS_4BCE(PTR)) \ + ? ( ((varattrib_4b *)(PTR))->va_compressed_ext.va_tcinfo & VARLENA_EXTSIZE_MASK ) \ + : ( ((varattrib_4b *)(PTR))->va_compressed.va_tcinfo & VARLENA_EXTSIZE_MASK ) \ + ) #define VARDATA_COMPRESSED_GET_COMPRESS_METHOD(PTR) \ - (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS) + ( (VARATT_IS_4BCE(PTR)) ? VARATT_4BCE_GET_COMPRESS_METHOD(((varattrib_4b *) (PTR))->va_compressed_ext.va_ecinfo) \ + : (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_EXTSIZE_BITS)) /* Same for external Datums; but note argument is a struct varatt_external */ #define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \ ((toast_pointer).va_extinfo & VARLENA_EXTSIZE_MASK) -#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \ - ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) - -#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ - do { \ - Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ - (cm) == TOAST_LZ4_COMPRESSION_ID); \ - ((toast_pointer).va_extinfo = \ - (len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS)); \ +#define VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) \ + ( ((toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS) == VARATT_4BCE_EXTFLAG \ + ? VARATT_4BCE_GET_COMPRESS_METHOD((toast_pointer).extended.cmp.va_ecinfo) \ + : (toast_pointer).va_extinfo >> VARLENA_EXTSIZE_BITS ) + +#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, len, cm) \ + do { \ + Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \ + (cm) == TOAST_LZ4_COMPRESSION_ID || \ + (cm) == TOAST_ZSTD_COMPRESSION_ID); \ + if (!TOAST_CMPID_EXTENDED((cm))) \ + /* method fits in the low bits of va_extinfo */ \ + (toast_pointer).va_extinfo = (uint32)(len) | ((uint32) (cm) << VARLENA_EXTSIZE_BITS); \ + else \ + { \ + /* set “extended” flag and store the extra byte */ \ + (toast_pointer).va_extinfo = (uint32)(len) | \ + (VARATT_4BCE_EXTFLAG << VARLENA_EXTSIZE_BITS); \ + VARATT_4BCE_SET_COMPRESS_METHOD((toast_pointer).extended.cmp.va_ecinfo, (cm)); \ + } \ } while (0) /* @@ -355,4 +426,41 @@ typedef struct (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \ (toast_pointer).va_rawsize - VARHDRSZ) +/* Upper-two-bit pattern 0b11 marks “extended compression methods used. */ +#define VARATT_4BCE_EXTFLAG 0x3 + +/* + * Layout of the extra 1-byte trailer for extended compression info: + * + * bit 7 6 5 4 3 2 1 0 + * +---+---+---+---+---+---+---+---+ + * | cmid_minus2 | F | + * +---+---+---+---+---+---+---+---+ + * + * • Bits 7–1 (cmid_minus2): + * 7-bit field holding (cmid − 2). The actual compression‐method ID (cmid) + * is (raw + 2), so raw ∈ [0…127] maps to cmid ∈ [2…129]. + * + * • Bit 0 (F): + * Single flag bit reserved for indicating whether this compression method has associated metadata. + */ +#define VARATT_4BCE_SET_COMPRESS_METHOD(va_ecinfo, cmid) \ + do { \ + bool meta = false; \ + (va_ecinfo) = (uint8)((((cmid) - 2) << 1) | ((meta) & 0x01)); \ + } while (0) + +#define VARATT_4BCE_GET_COMPRESS_METHOD(raw) ((((raw) >> 1) & 0x7F) + 2) + +/* Does this varattrib use the “compressed-extended” format? */ +#define VARATT_IS_4BCE(ptr) \ + ((((varattrib_4b *)(ptr))->va_compressed_ext.va_tcinfo >> VARLENA_EXTSIZE_BITS) \ + == VARATT_4BCE_EXTFLAG) + +/* Access the start of the compressed payload */ +#define VARDATA_4BCE(ptr) \ + (((varattrib_4b *)(ptr))->va_compressed_ext.va_data) + +#define VARHDRSZ_4BCE (offsetof(varattrib_4b, va_compressed_ext.va_data)) + #endif diff --git a/src/test/regress/expected/compression.out b/src/test/regress/expected/compression.out index 4dd9ee7200d1..84b4d0b39d46 100644 --- a/src/test/regress/expected/compression.out +++ b/src/test/regress/expected/compression.out @@ -1,3 +1,5 @@ +CREATE SCHEMA pglz; +SET search_path TO pglz, public; \set HIDE_TOAST_COMPRESSION false -- ensure we get stable results regardless of installation's default SET default_toast_compression = 'pglz'; @@ -6,20 +8,20 @@ CREATE TABLE cmdata(f1 text COMPRESSION pglz); CREATE INDEX idx ON cmdata(f1); INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); \d+ cmdata - Table "public.cmdata" + Table "pglz.cmdata" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | pglz | | Indexes: "idx" btree (f1) -CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); +CREATE TABLE cmdata1(f1 TEXT COMPRESSION pglz); INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); \d+ cmdata1 - Table "public.cmdata1" + Table "pglz.cmdata1" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | lz4 | | + f1 | text | | | | extended | pglz | | -- verify stored compression method in the data SELECT pg_column_compression(f1) FROM cmdata; @@ -31,7 +33,7 @@ SELECT pg_column_compression(f1) FROM cmdata; SELECT pg_column_compression(f1) FROM cmdata1; pg_column_compression ----------------------- - lz4 + pglz (1 row) -- decompress data slice @@ -50,7 +52,7 @@ SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; -- copy with table creation SELECT * INTO cmmove1 FROM cmdata; \d+ cmmove1 - Table "public.cmmove1" + Table "pglz.cmmove1" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | | | @@ -69,16 +71,16 @@ SELECT pg_column_compression(f1) FROM cmmove3; pg_column_compression ----------------------- pglz - lz4 + pglz (2 rows) -- test LIKE INCLUDING COMPRESSION CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | lz4 | | + f1 | text | | | | extended | pglz | | DROP TABLE cmdata2; -- try setting compression for incompressible data type @@ -97,7 +99,7 @@ UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; SELECT pg_column_compression(f1) FROM cmmove2; pg_column_compression ----------------------- - lz4 + pglz (1 row) -- test externally stored compressed data @@ -115,8 +117,8 @@ INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); SELECT pg_column_compression(f1) FROM cmdata1; pg_column_compression ----------------------- - lz4 - lz4 + pglz + pglz (2 rows) SELECT SUBSTR(f1, 200, 5) FROM cmdata1; @@ -136,21 +138,21 @@ DROP TABLE cmdata2; --test column type update varlena/non-varlena CREATE TABLE cmdata2 (f1 int); \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+---------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | integer | | | | plain | | | ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | character varying | | | | extended | | | ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+---------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | integer | | | | plain | | | @@ -160,14 +162,14 @@ ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | character varying | | | | extended | pglz | | ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | character varying | | | | plain | pglz | | @@ -182,7 +184,7 @@ SELECT pg_column_compression(f1) FROM cmdata2; -- test compression with materialized view CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; \d+ compressmv - Materialized view "public.compressmv" + Materialized view "pglz.compressmv" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- x | text | | | | extended | | | @@ -193,19 +195,19 @@ View definition: SELECT pg_column_compression(f1) FROM cmdata1; pg_column_compression ----------------------- - lz4 - lz4 + pglz + pglz (2 rows) SELECT pg_column_compression(x) FROM compressmv; pg_column_compression ----------------------- - lz4 - lz4 + pglz + pglz (2 rows) -- test compression with partition -CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart(f1 text COMPRESSION pglz) PARTITION BY HASH(f1); CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); CREATE TABLE cmpart2(f1 text COMPRESSION pglz); ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); @@ -214,7 +216,7 @@ INSERT INTO cmpart VALUES (repeat('123456789', 4004)); SELECT pg_column_compression(f1) FROM cmpart1; pg_column_compression ----------------------- - lz4 + pglz (1 row) SELECT pg_column_compression(f1) FROM cmpart2; @@ -224,34 +226,19 @@ SELECT pg_column_compression(f1) FROM cmpart2; (1 row) -- test compression with inheritance -CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error -NOTICE: merging multiple inherited definitions of column "f1" -ERROR: column "f1" has a compression method conflict -DETAIL: pglz versus lz4 -CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error -NOTICE: merging column "f1" with inherited definition -ERROR: column "f1" has a compression method conflict -DETAIL: pglz versus lz4 CREATE TABLE cmdata3(f1 text); CREATE TABLE cminh() INHERITS (cmdata, cmdata3); NOTICE: merging multiple inherited definitions of column "f1" -- test default_toast_compression GUC -SET default_toast_compression = ''; -ERROR: invalid value for parameter "default_toast_compression": "" -HINT: Available values: pglz, lz4. -SET default_toast_compression = 'I do not exist compression'; -ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" -HINT: Available values: pglz, lz4. -SET default_toast_compression = 'lz4'; SET default_toast_compression = 'pglz'; -- test alter compression method -ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION pglz; INSERT INTO cmdata VALUES (repeat('123456789', 4004)); \d+ cmdata - Table "public.cmdata" + Table "pglz.cmdata" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | lz4 | | + f1 | text | | | | extended | pglz | | Indexes: "idx" btree (f1) Child tables: cminh @@ -260,37 +247,37 @@ SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - lz4 + pglz (2 rows) ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; \d+ cmdata2 - Table "public.cmdata2" + Table "pglz.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | character varying | | | | plain | | | -- test alter compression method for materialized views -ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION pglz; \d+ compressmv - Materialized view "public.compressmv" + Materialized view "pglz.compressmv" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- - x | text | | | | extended | lz4 | | + x | text | | | | extended | pglz | | View definition: SELECT f1 AS x FROM cmdata1; -- test alter compression method for partitioned tables ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; -ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION pglz; -- new data should be compressed with the current compression method INSERT INTO cmpart VALUES (repeat('123456789', 1004)); INSERT INTO cmpart VALUES (repeat('123456789', 4004)); SELECT pg_column_compression(f1) FROM cmpart1; pg_column_compression ----------------------- - lz4 + pglz pglz (2 rows) @@ -298,7 +285,7 @@ SELECT pg_column_compression(f1) FROM cmpart2; pg_column_compression ----------------------- pglz - lz4 + pglz (2 rows) -- VACUUM FULL does not recompress @@ -306,7 +293,7 @@ SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - lz4 + pglz (2 rows) VACUUM FULL cmdata; @@ -314,12 +301,12 @@ SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - lz4 + pglz (2 rows) -- test expression index DROP TABLE cmdata2; -CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION pglz); CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM generate_series(1, 50) g), VERSION()); diff --git a/src/test/regress/expected/compression_1.out b/src/test/regress/expected/compression_lz4.out similarity index 66% rename from src/test/regress/expected/compression_1.out rename to src/test/regress/expected/compression_lz4.out index 7bd7642b4b94..d4970ffe5d43 100644 --- a/src/test/regress/expected/compression_1.out +++ b/src/test/regress/expected/compression_lz4.out @@ -1,12 +1,19 @@ +SELECT NOT(pg_compression_available('lz4')) AS skip_test \gset +\if :skip_test + \echo '*** skipping lz4 tests (lz4 not available) ***' + \quit +\endif +CREATE SCHEMA lz4; +SET search_path TO lz4, public; \set HIDE_TOAST_COMPRESSION false -- ensure we get stable results regardless of installation's default -SET default_toast_compression = 'pglz'; +SET default_toast_compression = 'lz4'; -- test creating table with compression method CREATE TABLE cmdata(f1 text COMPRESSION pglz); CREATE INDEX idx ON cmdata(f1); INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); \d+ cmdata - Table "public.cmdata" + Table "lz4.cmdata" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | pglz | | @@ -14,13 +21,13 @@ Indexes: "idx" btree (f1) CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -ERROR: relation "cmdata1" does not exist -LINE 1: INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); - ^ \d+ cmdata1 + Table "lz4.cmdata1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + -- verify stored compression method in the data SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression @@ -29,9 +36,11 @@ SELECT pg_column_compression(f1) FROM cmdata; (1 row) SELECT pg_column_compression(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; - ^ + pg_column_compression +----------------------- + lz4 +(1 row) + -- decompress data slice SELECT SUBSTR(f1, 200, 5) FROM cmdata; substr @@ -40,13 +49,15 @@ SELECT SUBSTR(f1, 200, 5) FROM cmdata; (1 row) SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; - ^ + substr +---------------------------------------------------- + 01234567890123456789012345678901234567890123456789 +(1 row) + -- copy with table creation SELECT * INTO cmmove1 FROM cmdata; \d+ cmmove1 - Table "public.cmmove1" + Table "lz4.cmmove1" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | text | | | | extended | | | @@ -61,23 +72,22 @@ SELECT pg_column_compression(f1) FROM cmmove1; CREATE TABLE cmmove3(f1 text COMPRESSION pglz); INSERT INTO cmmove3 SELECT * FROM cmdata; INSERT INTO cmmove3 SELECT * FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: INSERT INTO cmmove3 SELECT * FROM cmdata1; - ^ SELECT pg_column_compression(f1) FROM cmmove3; pg_column_compression ----------------------- pglz -(1 row) + lz4 +(2 rows) -- test LIKE INCLUDING COMPRESSION CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); -ERROR: relation "cmdata1" does not exist -LINE 1: CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); - ^ \d+ cmdata2 + Table "lz4.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | lz4 | | + DROP TABLE cmdata2; -ERROR: table "cmdata2" does not exist -- try setting compression for incompressible data type CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); ERROR: column data type integer does not support compression @@ -91,13 +101,10 @@ SELECT pg_column_compression(f1) FROM cmmove2; (1 row) UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; - ^ SELECT pg_column_compression(f1) FROM cmmove2; pg_column_compression ----------------------- - pglz + lz4 (1 row) -- test externally stored compressed data @@ -112,17 +119,20 @@ SELECT pg_column_compression(f1) FROM cmdata2; (1 row) INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); -ERROR: relation "cmdata1" does not exist -LINE 1: INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); - ^ SELECT pg_column_compression(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; - ^ + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + SELECT SUBSTR(f1, 200, 5) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT SUBSTR(f1, 200, 5) FROM cmdata1; - ^ + substr +-------- + 01234 + 79026 +(2 rows) + SELECT SUBSTR(f1, 200, 5) FROM cmdata2; substr -------- @@ -133,21 +143,21 @@ DROP TABLE cmdata2; --test column type update varlena/non-varlena CREATE TABLE cmdata2 (f1 int); \d+ cmdata2 - Table "public.cmdata2" + Table "lz4.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+---------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | integer | | | | plain | | | ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; \d+ cmdata2 - Table "public.cmdata2" + Table "lz4.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | character varying | | | | extended | | | ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; \d+ cmdata2 - Table "public.cmdata2" + Table "lz4.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+---------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | integer | | | | plain | | | @@ -157,14 +167,14 @@ ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; \d+ cmdata2 - Table "public.cmdata2" + Table "lz4.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- f1 | character varying | | | | extended | pglz | | ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; \d+ cmdata2 - Table "public.cmdata2" + Table "lz4.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | character varying | | | | plain | pglz | | @@ -178,47 +188,53 @@ SELECT pg_column_compression(f1) FROM cmdata2; -- test compression with materialized view CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: ...TE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; - ^ \d+ compressmv + Materialized view "lz4.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | | | +View definition: + SELECT f1 AS x + FROM cmdata1; + SELECT pg_column_compression(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmdata1; - ^ + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + SELECT pg_column_compression(x) FROM compressmv; -ERROR: relation "compressmv" does not exist -LINE 1: SELECT pg_column_compression(x) FROM compressmv; - ^ + pg_column_compression +----------------------- + lz4 + lz4 +(2 rows) + -- test compression with partition CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); -ERROR: relation "cmpart" does not exist CREATE TABLE cmpart2(f1 text COMPRESSION pglz); ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); -ERROR: relation "cmpart" does not exist INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 1004)); - ^ INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 4004)); - ^ SELECT pg_column_compression(f1) FROM cmpart1; -ERROR: relation "cmpart1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmpart1; - ^ + pg_column_compression +----------------------- + lz4 +(1 row) + SELECT pg_column_compression(f1) FROM cmpart2; pg_column_compression ----------------------- -(0 rows) + pglz +(1 row) -- test compression with inheritance CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error -ERROR: relation "cmdata1" does not exist +NOTICE: merging multiple inherited definitions of column "f1" +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus lz4 CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error NOTICE: merging column "f1" with inherited definition ERROR: column "f1" has a compression method conflict @@ -227,26 +243,15 @@ CREATE TABLE cmdata3(f1 text); CREATE TABLE cminh() INHERITS (cmdata, cmdata3); NOTICE: merging multiple inherited definitions of column "f1" -- test default_toast_compression GUC -SET default_toast_compression = ''; -ERROR: invalid value for parameter "default_toast_compression": "" -HINT: Available values: pglz. -SET default_toast_compression = 'I do not exist compression'; -ERROR: invalid value for parameter "default_toast_compression": "I do not exist compression" -HINT: Available values: pglz. SET default_toast_compression = 'lz4'; -ERROR: invalid value for parameter "default_toast_compression": "lz4" -HINT: Available values: pglz. -SET default_toast_compression = 'pglz'; -- test alter compression method ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. INSERT INTO cmdata VALUES (repeat('123456789', 4004)); \d+ cmdata - Table "public.cmdata" + Table "lz4.cmdata" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+------+-----------+----------+---------+----------+-------------+--------------+------------- - f1 | text | | | | extended | pglz | | + f1 | text | | | | extended | lz4 | | Indexes: "idx" btree (f1) Child tables: cminh @@ -255,50 +260,53 @@ SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - pglz + lz4 (2 rows) ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; \d+ cmdata2 - Table "public.cmdata2" + Table "lz4.cmdata2" Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description --------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- f1 | character varying | | | | plain | | | -- test alter compression method for materialized views ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; -ERROR: relation "compressmv" does not exist \d+ compressmv + Materialized view "lz4.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | lz4 | | +View definition: + SELECT f1 AS x + FROM cmdata1; + -- test alter compression method for partitioned tables ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; -ERROR: relation "cmpart1" does not exist ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. -- new data should be compressed with the current compression method INSERT INTO cmpart VALUES (repeat('123456789', 1004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 1004)); - ^ INSERT INTO cmpart VALUES (repeat('123456789', 4004)); -ERROR: relation "cmpart" does not exist -LINE 1: INSERT INTO cmpart VALUES (repeat('123456789', 4004)); - ^ SELECT pg_column_compression(f1) FROM cmpart1; -ERROR: relation "cmpart1" does not exist -LINE 1: SELECT pg_column_compression(f1) FROM cmpart1; - ^ + pg_column_compression +----------------------- + lz4 + pglz +(2 rows) + SELECT pg_column_compression(f1) FROM cmpart2; pg_column_compression ----------------------- -(0 rows) + pglz + lz4 +(2 rows) -- VACUUM FULL does not recompress SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - pglz + lz4 (2 rows) VACUUM FULL cmdata; @@ -306,21 +314,15 @@ SELECT pg_column_compression(f1) FROM cmdata; pg_column_compression ----------------------- pglz - pglz + lz4 (2 rows) -- test expression index DROP TABLE cmdata2; CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); -ERROR: compression method lz4 not supported -DETAIL: This functionality requires the server to be built with lz4 support. CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); -ERROR: relation "cmdata2" does not exist INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM generate_series(1, 50) g), VERSION()); -ERROR: relation "cmdata2" does not exist -LINE 1: INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEX... - ^ -- check data is ok SELECT length(f1) FROM cmdata; length @@ -330,9 +332,12 @@ SELECT length(f1) FROM cmdata; (2 rows) SELECT length(f1) FROM cmdata1; -ERROR: relation "cmdata1" does not exist -LINE 1: SELECT length(f1) FROM cmdata1; - ^ + length +-------- + 10040 + 12449 +(2 rows) + SELECT length(f1) FROM cmmove1; length -------- @@ -349,7 +354,8 @@ SELECT length(f1) FROM cmmove3; length -------- 10000 -(1 row) + 10040 +(2 rows) CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails ERROR: invalid compression method "i_do_not_exist_compression" diff --git a/src/test/regress/expected/compression_lz4_1.out b/src/test/regress/expected/compression_lz4_1.out new file mode 100644 index 000000000000..199b4c4abd07 --- /dev/null +++ b/src/test/regress/expected/compression_lz4_1.out @@ -0,0 +1,5 @@ +SELECT NOT(pg_compression_available('lz4')) AS skip_test \gset +\if :skip_test + \echo '*** skipping lz4 tests (lz4 not available) ***' +*** skipping lz4 tests (lz4 not available) *** + \quit diff --git a/src/test/regress/expected/compression_zstd.out b/src/test/regress/expected/compression_zstd.out new file mode 100644 index 000000000000..97222b20a283 --- /dev/null +++ b/src/test/regress/expected/compression_zstd.out @@ -0,0 +1,376 @@ +SELECT NOT(pg_compression_available('zstd')) AS skip_test \gset +\if :skip_test + \echo '*** skipping zstd tests (zstd not available) ***' + \quit +\endif +CREATE SCHEMA zstd; +SET search_path TO zstd, public; +\set HIDE_TOAST_COMPRESSION false +-- ensure we get stable results regardless of installation's default +SET default_toast_compression = 'zstd'; +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata + Table "zstd.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | pglz | | +Indexes: + "idx" btree (f1) + +CREATE TABLE cmdata1(f1 TEXT COMPRESSION zstd); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -- inline +INSERT INTO cmdata1 VALUES (repeat('1234567890', 2500000)); -- externally stored +\d+ cmdata1 + Table "zstd.cmdata1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz +(1 row) + +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + zstd + zstd +(2 rows) + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; + substr +-------- + 01234 +(1 row) + +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + substr +---------------------------------------------------- + 01234567890123456789012345678901234567890123456789 + 01234567890123456789012345678901234567890123456789 +(2 rows) + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 + Table "zstd.cmmove1" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | | | + +SELECT pg_column_compression(f1) FROM cmmove1; + pg_column_compression +----------------------- + pglz +(1 row) + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + pg_column_compression +----------------------- + pglz + zstd + zstd +(3 rows) + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | + +DROP TABLE cmdata2; +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); +ERROR: column data type integer does not support compression +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + pglz +(1 row) + +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + pg_column_compression +----------------------- + zstd +(1 row) + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + pglz +(1 row) + +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + zstd + zstd + zstd +(3 rows) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; + substr +-------- + 01234 + 01234 + 79026 +(3 rows) + +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; + substr +-------- + 79026 +(1 row) + +DROP TABLE cmdata2; +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+---------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | integer | | | | plain | | | + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | character varying | | | | extended | pglz | | + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | character varying | | | | plain | pglz | | + +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + pg_column_compression +----------------------- + +(1 row) + +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; +\d+ compressmv + Materialized view "zstd.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | | | +View definition: + SELECT f1 AS x + FROM cmdata1; + +SELECT pg_column_compression(f1) FROM cmdata1; + pg_column_compression +----------------------- + zstd + zstd + zstd +(3 rows) + +SELECT pg_column_compression(x) FROM compressmv; + pg_column_compression +----------------------- + zstd + zstd + zstd +(3 rows) + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION zstd) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + zstd +(1 row) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz +(1 row) + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error +NOTICE: merging multiple inherited definitions of column "f1" +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus zstd +CREATE TABLE cminh(f1 TEXT COMPRESSION zstd) INHERITS(cmdata); -- error +NOTICE: merging column "f1" with inherited definition +ERROR: column "f1" has a compression method conflict +DETAIL: pglz versus zstd +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata, cmdata3); +NOTICE: merging multiple inherited definitions of column "f1" +-- test default_toast_compression GUC +SET default_toast_compression = 'zstd'; +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION zstd; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata + Table "zstd.cmdata" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + f1 | text | | | | extended | zstd | | +Indexes: + "idx" btree (f1) +Child tables: cminh + +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; +\d+ cmdata2 + Table "zstd.cmdata2" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+-------------------+-----------+----------+---------+---------+-------------+--------------+------------- + f1 | character varying | | | | plain | | | + +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION zstd; +\d+ compressmv + Materialized view "zstd.compressmv" + Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description +--------+------+-----------+----------+---------+----------+-------------+--------------+------------- + x | text | | | | extended | zstd | | +View definition: + SELECT f1 AS x + FROM cmdata1; + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION zstd; +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; + pg_column_compression +----------------------- + zstd + pglz +(2 rows) + +SELECT pg_column_compression(f1) FROM cmpart2; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +-- VACUUM FULL does not recompress +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + pg_column_compression +----------------------- + pglz + zstd +(2 rows) + +-- test expression index +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION zstd); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); +-- check data is ok +SELECT length(f1) FROM cmdata; + length +-------- + 10000 + 36036 +(2 rows) + +SELECT length(f1) FROM cmdata1; + length +---------- + 10040 + 25000000 + 12449 +(3 rows) + +SELECT length(f1) FROM cmmove1; + length +-------- + 10000 +(1 row) + +SELECT length(f1) FROM cmmove2; + length +-------- + 10040 +(1 row) + +SELECT length(f1) FROM cmmove3; + length +---------- + 10000 + 10040 + 25000000 +(3 rows) + +CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails +ERROR: invalid compression method "i_do_not_exist_compression" +CREATE TABLE badcompresstbl (a text); +ALTER TABLE badcompresstbl ALTER a SET COMPRESSION I_Do_Not_Exist_Compression; -- fails +ERROR: invalid compression method "i_do_not_exist_compression" +DROP TABLE badcompresstbl; +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/expected/compression_zstd_1.out b/src/test/regress/expected/compression_zstd_1.out new file mode 100644 index 000000000000..6ad1a812533c --- /dev/null +++ b/src/test/regress/expected/compression_zstd_1.out @@ -0,0 +1,5 @@ +SELECT NOT(pg_compression_available('zstd')) AS skip_test \gset +\if :skip_test + \echo '*** skipping zstd tests (zstd not available) ***' +*** skipping zstd tests (zstd not available) *** + \quit diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index a424be2a6bf0..1ef4797cd104 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -123,7 +123,7 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare conversion tr # The stats test resets stats, so nothing else needing stats access can be in # this group. # ---------- -test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression memoize stats predicate numa +test: partition_join partition_prune reloptions hash_part indexing partition_aggregate partition_info tuplesort explain compression compression_lz4 compression_zstd memoize stats predicate numa # event_trigger depends on create_am and cannot run concurrently with # any test that runs DDL diff --git a/src/test/regress/sql/compression.sql b/src/test/regress/sql/compression.sql index 490595fcfb26..a21491456a7f 100644 --- a/src/test/regress/sql/compression.sql +++ b/src/test/regress/sql/compression.sql @@ -1,3 +1,6 @@ +CREATE SCHEMA pglz; +SET search_path TO pglz, public; + \set HIDE_TOAST_COMPRESSION false -- ensure we get stable results regardless of installation's default @@ -8,7 +11,7 @@ CREATE TABLE cmdata(f1 text COMPRESSION pglz); CREATE INDEX idx ON cmdata(f1); INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); \d+ cmdata -CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); +CREATE TABLE cmdata1(f1 TEXT COMPRESSION pglz); INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); \d+ cmdata1 @@ -83,7 +86,7 @@ SELECT pg_column_compression(f1) FROM cmdata1; SELECT pg_column_compression(x) FROM compressmv; -- test compression with partition -CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart(f1 text COMPRESSION pglz) PARTITION BY HASH(f1); CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); CREATE TABLE cmpart2(f1 text COMPRESSION pglz); @@ -94,19 +97,14 @@ SELECT pg_column_compression(f1) FROM cmpart1; SELECT pg_column_compression(f1) FROM cmpart2; -- test compression with inheritance -CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error -CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error CREATE TABLE cmdata3(f1 text); CREATE TABLE cminh() INHERITS (cmdata, cmdata3); -- test default_toast_compression GUC -SET default_toast_compression = ''; -SET default_toast_compression = 'I do not exist compression'; -SET default_toast_compression = 'lz4'; SET default_toast_compression = 'pglz'; -- test alter compression method -ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION pglz; INSERT INTO cmdata VALUES (repeat('123456789', 4004)); \d+ cmdata SELECT pg_column_compression(f1) FROM cmdata; @@ -115,12 +113,12 @@ ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; \d+ cmdata2 -- test alter compression method for materialized views -ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION pglz; \d+ compressmv -- test alter compression method for partitioned tables ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; -ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION pglz; -- new data should be compressed with the current compression method INSERT INTO cmpart VALUES (repeat('123456789', 1004)); @@ -135,7 +133,7 @@ SELECT pg_column_compression(f1) FROM cmdata; -- test expression index DROP TABLE cmdata2; -CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION pglz); CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM generate_series(1, 50) g), VERSION()); diff --git a/src/test/regress/sql/compression_lz4.sql b/src/test/regress/sql/compression_lz4.sql new file mode 100644 index 000000000000..c801adfa557a --- /dev/null +++ b/src/test/regress/sql/compression_lz4.sql @@ -0,0 +1,161 @@ +SELECT NOT(pg_compression_available('lz4')) AS skip_test \gset +\if :skip_test + \echo '*** skipping lz4 tests (lz4 not available) ***' + \quit +\endif + +CREATE SCHEMA lz4; +SET search_path TO lz4, public; + +\set HIDE_TOAST_COMPRESSION false + +-- ensure we get stable results regardless of installation's default +SET default_toast_compression = 'lz4'; + +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata +CREATE TABLE cmdata1(f1 TEXT COMPRESSION lz4); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); +\d+ cmdata1 + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; +SELECT pg_column_compression(f1) FROM cmdata1; + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 +SELECT pg_column_compression(f1) FROM cmmove1; + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 +DROP TABLE cmdata2; + +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); + +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; +DROP TABLE cmdata2; + +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; +\d+ compressmv +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT pg_column_compression(x) FROM compressmv; + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION lz4) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); + +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error +CREATE TABLE cminh(f1 TEXT COMPRESSION lz4) INHERITS(cmdata); -- error +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata, cmdata3); + +-- test default_toast_compression GUC +SET default_toast_compression = 'lz4'; + +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION lz4; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata; + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; +\d+ cmdata2 + +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION lz4; +\d+ compressmv + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION lz4; + +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- VACUUM FULL does not recompress +SELECT pg_column_compression(f1) FROM cmdata; +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + +-- test expression index +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION lz4); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); + +-- check data is ok +SELECT length(f1) FROM cmdata; +SELECT length(f1) FROM cmdata1; +SELECT length(f1) FROM cmmove1; +SELECT length(f1) FROM cmmove2; +SELECT length(f1) FROM cmmove3; + +CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails +CREATE TABLE badcompresstbl (a text); +ALTER TABLE badcompresstbl ALTER a SET COMPRESSION I_Do_Not_Exist_Compression; -- fails +DROP TABLE badcompresstbl; + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/test/regress/sql/compression_zstd.sql b/src/test/regress/sql/compression_zstd.sql new file mode 100644 index 000000000000..ec709387a17f --- /dev/null +++ b/src/test/regress/sql/compression_zstd.sql @@ -0,0 +1,162 @@ +SELECT NOT(pg_compression_available('zstd')) AS skip_test \gset +\if :skip_test + \echo '*** skipping zstd tests (zstd not available) ***' + \quit +\endif + +CREATE SCHEMA zstd; +SET search_path TO zstd, public; + +\set HIDE_TOAST_COMPRESSION false + +-- ensure we get stable results regardless of installation's default +SET default_toast_compression = 'zstd'; + +-- test creating table with compression method +CREATE TABLE cmdata(f1 text COMPRESSION pglz); +CREATE INDEX idx ON cmdata(f1); +INSERT INTO cmdata VALUES(repeat('1234567890', 1000)); +\d+ cmdata +CREATE TABLE cmdata1(f1 TEXT COMPRESSION zstd); +INSERT INTO cmdata1 VALUES(repeat('1234567890', 1004)); -- inline +INSERT INTO cmdata1 VALUES (repeat('1234567890', 2500000)); -- externally stored +\d+ cmdata1 + +-- verify stored compression method in the data +SELECT pg_column_compression(f1) FROM cmdata; +SELECT pg_column_compression(f1) FROM cmdata1; + +-- decompress data slice +SELECT SUBSTR(f1, 200, 5) FROM cmdata; +SELECT SUBSTR(f1, 2000, 50) FROM cmdata1; + +-- copy with table creation +SELECT * INTO cmmove1 FROM cmdata; +\d+ cmmove1 +SELECT pg_column_compression(f1) FROM cmmove1; + +-- copy to existing table +CREATE TABLE cmmove3(f1 text COMPRESSION pglz); +INSERT INTO cmmove3 SELECT * FROM cmdata; +INSERT INTO cmmove3 SELECT * FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove3; + +-- test LIKE INCLUDING COMPRESSION +CREATE TABLE cmdata2 (LIKE cmdata1 INCLUDING COMPRESSION); +\d+ cmdata2 +DROP TABLE cmdata2; + +-- try setting compression for incompressible data type +CREATE TABLE cmdata2 (f1 int COMPRESSION pglz); + +-- update using datum from different table +CREATE TABLE cmmove2(f1 text COMPRESSION pglz); +INSERT INTO cmmove2 VALUES (repeat('1234567890', 1004)); +SELECT pg_column_compression(f1) FROM cmmove2; +UPDATE cmmove2 SET f1 = cmdata1.f1 FROM cmdata1; +SELECT pg_column_compression(f1) FROM cmmove2; + +-- test externally stored compressed data +CREATE OR REPLACE FUNCTION large_val() RETURNS TEXT LANGUAGE SQL AS +'select array_agg(fipshash(g::text))::text from generate_series(1, 256) g'; +CREATE TABLE cmdata2 (f1 text COMPRESSION pglz); +INSERT INTO cmdata2 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata2; +INSERT INTO cmdata1 SELECT large_val() || repeat('a', 4000); +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata1; +SELECT SUBSTR(f1, 200, 5) FROM cmdata2; +DROP TABLE cmdata2; + +--test column type update varlena/non-varlena +CREATE TABLE cmdata2 (f1 int); +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE int USING f1::integer; +\d+ cmdata2 + +--changing column storage should not impact the compression method +--but the data should not be compressed +ALTER TABLE cmdata2 ALTER COLUMN f1 TYPE varchar; +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION pglz; +\d+ cmdata2 +ALTER TABLE cmdata2 ALTER COLUMN f1 SET STORAGE plain; +\d+ cmdata2 +INSERT INTO cmdata2 VALUES (repeat('123456789', 800)); +SELECT pg_column_compression(f1) FROM cmdata2; + +-- test compression with materialized view +CREATE MATERIALIZED VIEW compressmv(x) AS SELECT * FROM cmdata1; +\d+ compressmv +SELECT pg_column_compression(f1) FROM cmdata1; +SELECT pg_column_compression(x) FROM compressmv; + +-- test compression with partition +CREATE TABLE cmpart(f1 text COMPRESSION zstd) PARTITION BY HASH(f1); +CREATE TABLE cmpart1 PARTITION OF cmpart FOR VALUES WITH (MODULUS 2, REMAINDER 0); +CREATE TABLE cmpart2(f1 text COMPRESSION pglz); + +ALTER TABLE cmpart ATTACH PARTITION cmpart2 FOR VALUES WITH (MODULUS 2, REMAINDER 1); +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- test compression with inheritance +CREATE TABLE cminh() INHERITS(cmdata, cmdata1); -- error +CREATE TABLE cminh(f1 TEXT COMPRESSION zstd) INHERITS(cmdata); -- error +CREATE TABLE cmdata3(f1 text); +CREATE TABLE cminh() INHERITS (cmdata, cmdata3); + +-- test default_toast_compression GUC +SET default_toast_compression = 'zstd'; + +-- test alter compression method +ALTER TABLE cmdata ALTER COLUMN f1 SET COMPRESSION zstd; +INSERT INTO cmdata VALUES (repeat('123456789', 4004)); +\d+ cmdata +SELECT pg_column_compression(f1) FROM cmdata; + +ALTER TABLE cmdata2 ALTER COLUMN f1 SET COMPRESSION default; +\d+ cmdata2 + +-- test alter compression method for materialized views +ALTER MATERIALIZED VIEW compressmv ALTER COLUMN x SET COMPRESSION zstd; +\d+ compressmv + +-- test alter compression method for partitioned tables +ALTER TABLE cmpart1 ALTER COLUMN f1 SET COMPRESSION pglz; +ALTER TABLE cmpart2 ALTER COLUMN f1 SET COMPRESSION zstd; + +-- new data should be compressed with the current compression method +INSERT INTO cmpart VALUES (repeat('123456789', 1004)); +INSERT INTO cmpart VALUES (repeat('123456789', 4004)); +SELECT pg_column_compression(f1) FROM cmpart1; +SELECT pg_column_compression(f1) FROM cmpart2; + +-- VACUUM FULL does not recompress +SELECT pg_column_compression(f1) FROM cmdata; +VACUUM FULL cmdata; +SELECT pg_column_compression(f1) FROM cmdata; + +-- test expression index +DROP TABLE cmdata2; +CREATE TABLE cmdata2 (f1 TEXT COMPRESSION pglz, f2 TEXT COMPRESSION zstd); +CREATE UNIQUE INDEX idx1 ON cmdata2 ((f1 || f2)); +INSERT INTO cmdata2 VALUES((SELECT array_agg(fipshash(g::TEXT))::TEXT FROM +generate_series(1, 50) g), VERSION()); + +-- check data is ok +SELECT length(f1) FROM cmdata; +SELECT length(f1) FROM cmdata1; +SELECT length(f1) FROM cmmove1; +SELECT length(f1) FROM cmmove2; +SELECT length(f1) FROM cmmove3; + +CREATE TABLE badcompresstbl (a text COMPRESSION I_Do_Not_Exist_Compression); -- fails +CREATE TABLE badcompresstbl (a text); +ALTER TABLE badcompresstbl ALTER a SET COMPRESSION I_Do_Not_Exist_Compression; -- fails +DROP TABLE badcompresstbl; + +\set HIDE_TOAST_COMPRESSION true diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ff050e93a507..c1bdf10f29a9 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -4137,7 +4137,6 @@ timeout_handler_proc timeout_params timerCA tlist_vinfo -toast_compress_header tokenize_error_callback_arg transferMode transfer_thread_arg