From 87ce27de6963091f4a365f80bcdb06b9da098f00 Mon Sep 17 00:00:00 2001 From: Amit Kapila Date: Wed, 4 Dec 2024 09:45:18 +0530 Subject: [PATCH] Ensure stored generated columns must be published when required. Ensure stored generated columns that are part of REPLICA IDENTITY must be published explicitly for UPDATE and DELETE operations to be published. We can publish generated columns by listing them in the column list or by enabling the publish_generated_columns option. This commit changes the behavior of the test added in commit adedf54e65 by giving an ERROR for the UPDATE operation in such cases. There is no way to trigger the bug reported in commit adedf54e65 but we didn't remove the corresponding code change because it is still relevant when replicating changes from a publisher with version less than 18. We decided not to backpatch this behavior change to avoid the risk of breaking existing output plugins that may be sending generated columns by default although we are not aware of any such plugin. Also, we didn't see any reports related to this on STABLE branches which is another reason not to backpatch this change. Author: Shlok Kyal, Hou Zhijie Reviewed-by: Vignesh C, Amit Kapila Discussion: https://postgr.es/m/CANhcyEVw4V2Awe2AB6i0E5AJLNdASShGfdBLbUd1XtWDboymCA@mail.gmail.com --- doc/src/sgml/ref/create_publication.sgml | 8 ++ src/backend/commands/publicationcmds.c | 156 +++++++++++++--------- src/backend/executor/execReplication.c | 39 ++++-- src/backend/utils/cache/relcache.c | 57 ++++++-- src/include/catalog/pg_publication.h | 7 + src/include/commands/publicationcmds.h | 7 +- src/test/regress/expected/publication.out | 26 ++++ src/test/regress/sql/publication.sql | 27 ++++ src/test/subscription/t/100_bugs.pl | 16 +-- 9 files changed, 243 insertions(+), 100 deletions(-) diff --git a/doc/src/sgml/ref/create_publication.sgml b/doc/src/sgml/ref/create_publication.sgml index f8e217d6610..5e25536554a 100644 --- a/doc/src/sgml/ref/create_publication.sgml +++ b/doc/src/sgml/ref/create_publication.sgml @@ -311,6 +311,14 @@ CREATE PUBLICATION name system columns. + + The generated columns that are part of REPLICA IDENTITY + must be published explicitly either by listing them in the column list or + by enabling the publish_generated_columns option, in + order for UPDATE and DELETE operations + to be published. + + The row filter on a table becomes redundant if FOR TABLES IN SCHEMA is specified and the table diff --git a/src/backend/commands/publicationcmds.c b/src/backend/commands/publicationcmds.c index 031c84ec29f..5050057a7e4 100644 --- a/src/backend/commands/publicationcmds.c +++ b/src/backend/commands/publicationcmds.c @@ -336,21 +336,36 @@ pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, } /* - * Check if all columns referenced in the REPLICA IDENTITY are covered by - * the column list. + * Check for invalid columns in the publication table definition. * - * Returns true if any replica identity column is not covered by column list. + * This function evaluates two conditions: + * + * 1. Ensures that all columns referenced in the REPLICA IDENTITY are covered + * by the column list. If any column is missing, *invalid_column_list is set + * to true. + * 2. Ensures that all the generated columns referenced in the REPLICA IDENTITY + * are published either by listing them in the column list or by enabling + * publish_generated_columns option. If any unpublished generated column is + * found, *invalid_gen_col is set to true. + * + * Returns true if any of the above conditions are not met. */ bool -pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, - bool pubviaroot) +pub_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, + bool pubviaroot, bool pubgencols, + bool *invalid_column_list, + bool *invalid_gen_col) { - HeapTuple tuple; Oid relid = RelationGetRelid(relation); Oid publish_as_relid = RelationGetRelid(relation); - bool result = false; - Datum datum; - bool isnull; + Bitmapset *idattrs; + Bitmapset *columns = NULL; + TupleDesc desc = RelationGetDescr(relation); + Publication *pub; + int x; + + *invalid_column_list = false; + *invalid_gen_col = false; /* * For a partition, if pubviaroot is true, find the topmost ancestor that @@ -368,80 +383,91 @@ pub_collist_contains_invalid_column(Oid pubid, Relation relation, List *ancestor publish_as_relid = relid; } - tuple = SearchSysCache2(PUBLICATIONRELMAP, - ObjectIdGetDatum(publish_as_relid), - ObjectIdGetDatum(pubid)); + /* Fetch the column list */ + pub = GetPublication(pubid); + check_and_fetch_column_list(pub, publish_as_relid, NULL, &columns); - if (!HeapTupleIsValid(tuple)) - return false; + if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) + { + /* With REPLICA IDENTITY FULL, no column list is allowed. */ + *invalid_column_list = (columns != NULL); - datum = SysCacheGetAttr(PUBLICATIONRELMAP, tuple, - Anum_pg_publication_rel_prattrs, - &isnull); + /* + * As we don't allow a column list with REPLICA IDENTITY FULL, the + * publish_generated_columns option must be set to true if the table + * has any stored generated columns. + */ + if (!pubgencols && + relation->rd_att->constr && + relation->rd_att->constr->has_generated_stored) + *invalid_gen_col = true; - if (!isnull) - { - int x; - Bitmapset *idattrs; - Bitmapset *columns = NULL; + if (*invalid_gen_col && *invalid_column_list) + return true; + } - /* With REPLICA IDENTITY FULL, no column list is allowed. */ - if (relation->rd_rel->relreplident == REPLICA_IDENTITY_FULL) - result = true; + /* Remember columns that are part of the REPLICA IDENTITY */ + idattrs = RelationGetIndexAttrBitmap(relation, + INDEX_ATTR_BITMAP_IDENTITY_KEY); - /* Transform the column list datum to a bitmapset. */ - columns = pub_collist_to_bitmapset(NULL, datum, NULL); + /* + * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are offset + * (to handle system columns the usual way), while column list does not + * use offset, so we can't do bms_is_subset(). Instead, we have to loop + * over the idattrs and check all of them are in the list. + */ + x = -1; + while ((x = bms_next_member(idattrs, x)) >= 0) + { + AttrNumber attnum = (x + FirstLowInvalidHeapAttributeNumber); + Form_pg_attribute att = TupleDescAttr(desc, attnum - 1); - /* Remember columns that are part of the REPLICA IDENTITY */ - idattrs = RelationGetIndexAttrBitmap(relation, - INDEX_ATTR_BITMAP_IDENTITY_KEY); + if (columns == NULL) + { + /* + * The publish_generated_columns option must be set to true if the + * REPLICA IDENTITY contains any stored generated column. + */ + if (!pubgencols && att->attgenerated) + { + *invalid_gen_col = true; + break; + } + + /* Skip validating the column list since it is not defined */ + continue; + } /* - * Attnums in the bitmap returned by RelationGetIndexAttrBitmap are - * offset (to handle system columns the usual way), while column list - * does not use offset, so we can't do bms_is_subset(). Instead, we - * have to loop over the idattrs and check all of them are in the - * list. + * If pubviaroot is true, we are validating the column list of the + * parent table, but the bitmap contains the replica identity + * information of the child table. The parent/child attnums may not + * match, so translate them to the parent - get the attname from the + * child, and look it up in the parent. */ - x = -1; - while ((x = bms_next_member(idattrs, x)) >= 0) + if (pubviaroot) { - AttrNumber attnum = (x + FirstLowInvalidHeapAttributeNumber); + /* attribute name in the child table */ + char *colname = get_attname(relid, attnum, false); /* - * If pubviaroot is true, we are validating the column list of the - * parent table, but the bitmap contains the replica identity - * information of the child table. The parent/child attnums may - * not match, so translate them to the parent - get the attname - * from the child, and look it up in the parent. + * Determine the attnum for the attribute name in parent (we are + * using the column list defined on the parent). */ - if (pubviaroot) - { - /* attribute name in the child table */ - char *colname = get_attname(relid, attnum, false); - - /* - * Determine the attnum for the attribute name in parent (we - * are using the column list defined on the parent). - */ - attnum = get_attnum(publish_as_relid, colname); - } - - /* replica identity column, not covered by the column list */ - if (!bms_is_member(attnum, columns)) - { - result = true; - break; - } + attnum = get_attnum(publish_as_relid, colname); } - bms_free(idattrs); - bms_free(columns); + /* replica identity column, not covered by the column list */ + *invalid_column_list |= !bms_is_member(attnum, columns); + + if (*invalid_column_list && *invalid_gen_col) + break; } - ReleaseSysCache(tuple); + bms_free(columns); + bms_free(idattrs); - return result; + return *invalid_column_list || *invalid_gen_col; } /* check_functions_in_node callback */ diff --git a/src/backend/executor/execReplication.c b/src/backend/executor/execReplication.c index 54025c9f150..cfdf2eedf4d 100644 --- a/src/backend/executor/execReplication.c +++ b/src/backend/executor/execReplication.c @@ -785,16 +785,27 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) return; /* - * It is only safe to execute UPDATE/DELETE when all columns, referenced - * in the row filters from publications which the relation is in, are - * valid - i.e. when all referenced columns are part of REPLICA IDENTITY - * or the table does not publish UPDATEs or DELETEs. + * It is only safe to execute UPDATE/DELETE if the relation does not + * publish UPDATEs or DELETEs, or all the following conditions are + * satisfied: + * + * 1. All columns, referenced in the row filters from publications which + * the relation is in, are valid - i.e. when all referenced columns are + * part of REPLICA IDENTITY. + * + * 2. All columns, referenced in the column lists are valid - i.e. when + * all columns referenced in the REPLICA IDENTITY are covered by the + * column list. + * + * 3. All generated columns in REPLICA IDENTITY of the relation, are valid + * - i.e. when all these generated columns are published. * * XXX We could optimize it by first checking whether any of the - * publications have a row filter for this relation. If not and relation - * has replica identity then we can avoid building the descriptor but as - * this happens only one time it doesn't seem worth the additional - * complexity. + * publications have a row filter or column list for this relation, or if + * the relation contains a generated column. If none of these exist and + * the relation has replica identity then we can avoid building the + * descriptor but as this happens only one time it doesn't seem worth the + * additional complexity. */ RelationBuildPublicationDesc(rel, &pubdesc); if (cmd == CMD_UPDATE && !pubdesc.rf_valid_for_update) @@ -809,6 +820,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) errmsg("cannot update table \"%s\"", RelationGetRelationName(rel)), errdetail("Column list used by the publication does not cover the replica identity."))); + else if (cmd == CMD_UPDATE && !pubdesc.gencols_valid_for_update) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("cannot update table \"%s\"", + RelationGetRelationName(rel)), + errdetail("Replica identity consists of an unpublished generated column."))); else if (cmd == CMD_DELETE && !pubdesc.rf_valid_for_delete) ereport(ERROR, (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), @@ -821,6 +838,12 @@ CheckCmdReplicaIdentity(Relation rel, CmdType cmd) errmsg("cannot delete from table \"%s\"", RelationGetRelationName(rel)), errdetail("Column list used by the publication does not cover the replica identity."))); + else if (cmd == CMD_DELETE && !pubdesc.gencols_valid_for_delete) + ereport(ERROR, + (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), + errmsg("cannot delete from table \"%s\"", + RelationGetRelationName(rel)), + errdetail("Replica identity consists of an unpublished generated column."))); /* If relation has replica identity we are always good. */ if (OidIsValid(RelationGetReplicaIndex(rel))) diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index d0892cee24d..422509f18d7 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -5706,12 +5706,19 @@ RelationGetExclusionInfo(Relation indexRelation, * Get the publication information for the given relation. * * Traverse all the publications which the relation is in to get the - * publication actions and validate the row filter expressions for such - * publications if any. We consider the row filter expression as invalid if it - * references any column which is not part of REPLICA IDENTITY. + * publication actions and validate: + * 1. The row filter expressions for such publications if any. We consider the + * row filter expression as invalid if it references any column which is not + * part of REPLICA IDENTITY. + * 2. The column list for such publication if any. We consider the column list + * invalid if REPLICA IDENTITY contains any column that is not part of it. + * 3. The generated columns of the relation for such publications. We consider + * any reference of an unpublished generated column in REPLICA IDENTITY as + * invalid. * * To avoid fetching the publication information repeatedly, we cache the - * publication actions and row filter validation information. + * publication actions, row filter validation information, column list + * validation information, and generated column validation information. */ void RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) @@ -5734,6 +5741,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) pubdesc->rf_valid_for_delete = true; pubdesc->cols_valid_for_update = true; pubdesc->cols_valid_for_delete = true; + pubdesc->gencols_valid_for_update = true; + pubdesc->gencols_valid_for_delete = true; return; } @@ -5748,6 +5757,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) pubdesc->rf_valid_for_delete = true; pubdesc->cols_valid_for_update = true; pubdesc->cols_valid_for_delete = true; + pubdesc->gencols_valid_for_update = true; + pubdesc->gencols_valid_for_delete = true; /* Fetch the publication membership info. */ puboids = GetRelationPublications(relid); @@ -5777,6 +5788,8 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) Oid pubid = lfirst_oid(lc); HeapTuple tup; Form_pg_publication pubform; + bool invalid_column_list; + bool invalid_gen_col; tup = SearchSysCache1(PUBLICATIONOID, ObjectIdGetDatum(pubid)); @@ -5811,18 +5824,27 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) /* * Check if all columns are part of the REPLICA IDENTITY index or not. * - * If the publication is FOR ALL TABLES then it means the table has no - * column list and we can skip the validation. + * Check if all generated columns included in the REPLICA IDENTITY are + * published. */ - if (!pubform->puballtables && - (pubform->pubupdate || pubform->pubdelete) && - pub_collist_contains_invalid_column(pubid, relation, ancestors, - pubform->pubviaroot)) + if ((pubform->pubupdate || pubform->pubdelete) && + pub_contains_invalid_column(pubid, relation, ancestors, + pubform->pubviaroot, + pubform->pubgencols, + &invalid_column_list, + &invalid_gen_col)) { if (pubform->pubupdate) - pubdesc->cols_valid_for_update = false; + { + pubdesc->cols_valid_for_update = !invalid_column_list; + pubdesc->gencols_valid_for_update = !invalid_gen_col; + } + if (pubform->pubdelete) - pubdesc->cols_valid_for_delete = false; + { + pubdesc->cols_valid_for_delete = !invalid_column_list; + pubdesc->gencols_valid_for_delete = !invalid_gen_col; + } } ReleaseSysCache(tup); @@ -5846,6 +5868,17 @@ RelationBuildPublicationDesc(Relation relation, PublicationDesc *pubdesc) pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate && !pubdesc->cols_valid_for_update && !pubdesc->cols_valid_for_delete) break; + + /* + * If we know everything is replicated and replica identity has an + * unpublished generated column, there is no point to check for other + * publications. + */ + if (pubdesc->pubactions.pubinsert && pubdesc->pubactions.pubupdate && + pubdesc->pubactions.pubdelete && pubdesc->pubactions.pubtruncate && + !pubdesc->gencols_valid_for_update && + !pubdesc->gencols_valid_for_delete) + break; } if (relation->rd_pubdesc) diff --git a/src/include/catalog/pg_publication.h b/src/include/catalog/pg_publication.h index 9a83a72d6b2..e2d894a2ff5 100644 --- a/src/include/catalog/pg_publication.h +++ b/src/include/catalog/pg_publication.h @@ -98,6 +98,13 @@ typedef struct PublicationDesc */ bool cols_valid_for_update; bool cols_valid_for_delete; + + /* + * true if all generated columns that are part of replica identity are + * published or the publication actions do not include UPDATE or DELETE. + */ + bool gencols_valid_for_update; + bool gencols_valid_for_delete; } PublicationDesc; typedef struct Publication diff --git a/src/include/commands/publicationcmds.h b/src/include/commands/publicationcmds.h index 5487c571f66..19037518e84 100644 --- a/src/include/commands/publicationcmds.h +++ b/src/include/commands/publicationcmds.h @@ -33,7 +33,10 @@ extern void AlterPublicationOwner_oid(Oid subid, Oid newOwnerId); extern void InvalidatePublicationRels(List *relids); extern bool pub_rf_contains_invalid_column(Oid pubid, Relation relation, List *ancestors, bool pubviaroot); -extern bool pub_collist_contains_invalid_column(Oid pubid, Relation relation, - List *ancestors, bool pubviaroot); +extern bool pub_contains_invalid_column(Oid pubid, Relation relation, + List *ancestors, bool pubviaroot, + bool pubgencols, + bool *invalid_column_list, + bool *invalid_gen_col); #endif /* PUBLICATIONCMDS_H */ diff --git a/src/test/regress/expected/publication.out b/src/test/regress/expected/publication.out index 5de2d64d01a..b44ab007de6 100644 --- a/src/test/regress/expected/publication.out +++ b/src/test/regress/expected/publication.out @@ -672,6 +672,32 @@ DROP TABLE rf_tbl_abcd_pk; DROP TABLE rf_tbl_abcd_nopk; DROP TABLE rf_tbl_abcd_part_pk; -- ====================================================== +-- ====================================================== +-- Tests with generated column +SET client_min_messages = 'ERROR'; +CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL); +CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b); +ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx; +-- error - generated column "b" must be published explicitly as it is +-- part of the REPLICA IDENTITY index. +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +ERROR: cannot update table "testpub_gencol" +DETAIL: Replica identity consists of an unpublished generated column. +-- error - generated column "b" must be published explicitly as it is +-- part of the REPLICA IDENTITY. +ALTER TABLE testpub_gencol REPLICA IDENTITY FULL; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +ERROR: cannot update table "testpub_gencol" +DETAIL: Replica identity consists of an unpublished generated column. +DROP PUBLICATION pub_gencol; +-- ok - generated column "b" is published explicitly +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true); +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +DROP PUBLICATION pub_gencol; +DROP TABLE testpub_gencol; +RESET client_min_messages; +-- ====================================================== -- fail - duplicate tables are not allowed if that table has any column lists SET client_min_messages = 'ERROR'; CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert'); diff --git a/src/test/regress/sql/publication.sql b/src/test/regress/sql/publication.sql index 48e68bcca2d..c4c21a95d0e 100644 --- a/src/test/regress/sql/publication.sql +++ b/src/test/regress/sql/publication.sql @@ -396,6 +396,33 @@ DROP TABLE rf_tbl_abcd_nopk; DROP TABLE rf_tbl_abcd_part_pk; -- ====================================================== +-- ====================================================== +-- Tests with generated column +SET client_min_messages = 'ERROR'; +CREATE TABLE testpub_gencol (a INT, b INT GENERATED ALWAYS AS (a + 1) STORED NOT NULL); +CREATE UNIQUE INDEX testpub_gencol_idx ON testpub_gencol (b); +ALTER TABLE testpub_gencol REPLICA IDENTITY USING index testpub_gencol_idx; + +-- error - generated column "b" must be published explicitly as it is +-- part of the REPLICA IDENTITY index. +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; + +-- error - generated column "b" must be published explicitly as it is +-- part of the REPLICA IDENTITY. +ALTER TABLE testpub_gencol REPLICA IDENTITY FULL; +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +DROP PUBLICATION pub_gencol; + +-- ok - generated column "b" is published explicitly +CREATE PUBLICATION pub_gencol FOR TABLE testpub_gencol with (publish_generated_columns = true); +UPDATE testpub_gencol SET a = 100 WHERE a = 1; +DROP PUBLICATION pub_gencol; + +DROP TABLE testpub_gencol; +RESET client_min_messages; +-- ====================================================== + -- fail - duplicate tables are not allowed if that table has any column lists SET client_min_messages = 'ERROR'; CREATE PUBLICATION testpub_dups FOR TABLE testpub_tbl1 (a), testpub_tbl1 WITH (publish = 'insert'); diff --git a/src/test/subscription/t/100_bugs.pl b/src/test/subscription/t/100_bugs.pl index cb36ca7b16b..794b928f50c 100644 --- a/src/test/subscription/t/100_bugs.pl +++ b/src/test/subscription/t/100_bugs.pl @@ -377,8 +377,8 @@ $node_publisher->safe_psql('postgres', "DROP PUBLICATION tap_pub_sch"); $node_publisher->stop('fast'); $node_subscriber->stop('fast'); -# The bug was that when the REPLICA IDENTITY FULL is used with dropped or -# generated columns, we fail to apply updates and deletes +# The bug was that when the REPLICA IDENTITY FULL is used with dropped +# we fail to apply updates and deletes $node_publisher->rotate_logfile(); $node_publisher->start(); @@ -389,18 +389,14 @@ $node_publisher->safe_psql( 'postgres', qq( CREATE TABLE dropped_cols (a int, b_drop int, c int); ALTER TABLE dropped_cols REPLICA IDENTITY FULL; - CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int); - ALTER TABLE generated_cols REPLICA IDENTITY FULL; - CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols, generated_cols; + CREATE PUBLICATION pub_dropped_cols FOR TABLE dropped_cols; -- some initial data INSERT INTO dropped_cols VALUES (1, 1, 1); - INSERT INTO generated_cols (a, c) VALUES (1, 1); )); $node_subscriber->safe_psql( 'postgres', qq( CREATE TABLE dropped_cols (a int, b_drop int, c int); - CREATE TABLE generated_cols (a int, b_gen int GENERATED ALWAYS AS (5 * a) STORED, c int); )); $publisher_connstr = $node_publisher->connstr . ' dbname=postgres'; @@ -421,7 +417,6 @@ $node_subscriber->safe_psql( $node_publisher->safe_psql( 'postgres', qq( UPDATE dropped_cols SET a = 100; - UPDATE generated_cols SET a = 100; )); $node_publisher->wait_for_catchup('sub_dropped_cols'); @@ -430,11 +425,6 @@ is( $node_subscriber->safe_psql( qq(1), 'replication with RI FULL and dropped columns'); -is( $node_subscriber->safe_psql( - 'postgres', "SELECT count(*) FROM generated_cols WHERE a = 100"), - qq(1), - 'replication with RI FULL and generated columns'); - $node_publisher->stop('fast'); $node_subscriber->stop('fast'); -- 2.30.2