Make RelationFlushRelation() work without ResourceOwner during abort
authorHeikki Linnakangas <[email protected]>
Thu, 6 Jun 2024 15:56:28 +0000 (18:56 +0300)
committerHeikki Linnakangas <[email protected]>
Thu, 6 Jun 2024 15:56:28 +0000 (18:56 +0300)
ReorderBufferImmediateInvalidation() executes invalidation messages in
an aborted transaction. However, RelationFlushRelation sometimes
required a valid resource owner, to temporarily increment the refcount
of the relache entry. Commit b8bff07daa worked around that in the main
subtransaction abort function, AbortSubTransaction(), but missed this
similar case in ReorderBufferImmediateInvalidation().

To fix, introduce a separate function to invalidate a relcache
entry. It does the same thing as RelationClearRelation(rebuild==true)
does when outside a transaction, but can be called without
incrementing the refcount.

Add regression test. Before this fix, it failed with:

ERROR: ResourceOwnerEnlarge called after release started

Reported-by: Alexander Lakhin <[email protected]>
Discussion: https://www.postgresql.org/message-id/e56be7d9-14b1-664d-0bfc-00ce9772721c@gmail.com

contrib/test_decoding/expected/decoding_in_xact.out
contrib/test_decoding/sql/decoding_in_xact.sql
src/backend/access/transam/xact.c
src/backend/utils/cache/relcache.c

index b65253f4630916521aad5a572a221ba41a2df265..ac03ff120300d8e5b99286dab041bc68bad0db2a 100644 (file)
@@ -79,6 +79,54 @@ SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'inc
  COMMIT
 (6 rows)
 
+-- Decoding works in transaction that issues DDL
+--
+-- We had issues handling relcache invalidations with these, see
+-- https://www.postgresql.org/message-id/[email protected]
+CREATE TABLE tbl_created_outside_xact(id SERIAL PRIMARY KEY);
+BEGIN;
+  -- TRUNCATE changes the relfilenode and sends relcache invalidation
+  TRUNCATE tbl_created_outside_xact;
+  INSERT INTO tbl_created_outside_xact(id) VALUES('1');
+  -- don't show yet, haven't committed
+  SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+ data 
+------
+(0 rows)
+
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+                             data                             
+--------------------------------------------------------------
+ BEGIN
+ table public.tbl_created_outside_xact: TRUNCATE: (no-flags)
+ table public.tbl_created_outside_xact: INSERT: id[integer]:1
+ COMMIT
+(4 rows)
+
+SET debug_logical_replication_streaming = immediate;
+BEGIN;
+  CREATE TABLE tbl_created_in_xact(id SERIAL PRIMARY KEY);
+  INSERT INTO tbl_created_in_xact VALUES (1);
+  CHECKPOINT; -- Force WAL flush, so that the above changes will be streamed
+  SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1');
+                   data                   
+------------------------------------------
+ opening a streamed block for transaction
+ streaming change for transaction
+ closing a streamed block for transaction
+(3 rows)
+
+COMMIT;
+RESET debug_logical_replication_streaming;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+                          data                           
+---------------------------------------------------------
+ BEGIN
+ table public.tbl_created_in_xact: INSERT: id[integer]:1
+ COMMIT
+(3 rows)
+
 SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
  ?column? 
 ----------
index 108782dc2e98d55944aec5982d8f109606289100..233ade5b6ce85f9e2d71ecc8466db9b8c0a539cc 100644 (file)
@@ -38,4 +38,31 @@ COMMIT;
 INSERT INTO nobarf(data) VALUES('3');
 SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
 
+-- Decoding works in transaction that issues DDL
+--
+-- We had issues handling relcache invalidations with these, see
+-- https://www.postgresql.org/message-id/[email protected]
+CREATE TABLE tbl_created_outside_xact(id SERIAL PRIMARY KEY);
+BEGIN;
+  -- TRUNCATE changes the relfilenode and sends relcache invalidation
+  TRUNCATE tbl_created_outside_xact;
+  INSERT INTO tbl_created_outside_xact(id) VALUES('1');
+
+  -- don't show yet, haven't committed
+  SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+COMMIT;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+
+SET debug_logical_replication_streaming = immediate;
+BEGIN;
+  CREATE TABLE tbl_created_in_xact(id SERIAL PRIMARY KEY);
+  INSERT INTO tbl_created_in_xact VALUES (1);
+
+  CHECKPOINT; -- Force WAL flush, so that the above changes will be streamed
+
+  SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1', 'stream-changes', '1');
+COMMIT;
+RESET debug_logical_replication_streaming;
+SELECT data FROM pg_logical_slot_get_changes('regression_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');
+
 SELECT 'stop' FROM pg_drop_replication_slot('regression_slot');
index 4f4ce757623c9bf84072bd0e238f763cba4f01c5..9bda1aa6bc6dd9c6decc0e88d9a30085e35a9893 100644 (file)
@@ -5279,20 +5279,7 @@ AbortSubTransaction(void)
 
        AtEOSubXact_RelationCache(false, s->subTransactionId,
                                  s->parent->subTransactionId);
-
-
-       /*
-        * AtEOSubXact_Inval sometimes needs to temporarily bump the refcount
-        * on the relcache entries that it processes.  We cannot use the
-        * subtransaction's resource owner anymore, because we've already
-        * started releasing it.  But we can use the parent resource owner.
-        */
-       CurrentResourceOwner = s->parent->curTransactionOwner;
-
        AtEOSubXact_Inval(false);
-
-       CurrentResourceOwner = s->curTransactionOwner;
-
        ResourceOwnerRelease(s->curTransactionOwner,
                             RESOURCE_RELEASE_LOCKS,
                             false, false);
index cc9b0c6524f3bc5201c8adcc9793febc5997871e..35dbb87ae3d5ea2a80c7afab7b88c283459f6076 100644 (file)
@@ -275,6 +275,7 @@ static HTAB *OpClassCache = NULL;
 
 static void RelationCloseCleanup(Relation relation);
 static void RelationDestroyRelation(Relation relation, bool remember_tupdesc);
+static void RelationInvalidateRelation(Relation relation);
 static void RelationClearRelation(Relation relation, bool rebuild);
 
 static void RelationReloadIndexInfo(Relation relation);
@@ -2512,6 +2513,31 @@ RelationDestroyRelation(Relation relation, bool remember_tupdesc)
    pfree(relation);
 }
 
+/*
+ * RelationInvalidateRelation - mark a relation cache entry as invalid
+ *
+ * An entry that's marked as invalid will be reloaded on next access.
+ */
+static void
+RelationInvalidateRelation(Relation relation)
+{
+   /*
+    * Make sure smgr and lower levels close the relation's files, if they
+    * weren't closed already.  If the relation is not getting deleted, the
+    * next smgr access should reopen the files automatically.  This ensures
+    * that the low-level file access state is updated after, say, a vacuum
+    * truncation.
+    */
+   RelationCloseSmgr(relation);
+
+   /* Free AM cached data, if any */
+   if (relation->rd_amcache)
+       pfree(relation->rd_amcache);
+   relation->rd_amcache = NULL;
+
+   relation->rd_isvalid = false;
+}
+
 /*
  * RelationClearRelation
  *
@@ -2846,14 +2872,28 @@ RelationFlushRelation(Relation relation)
         * New relcache entries are always rebuilt, not flushed; else we'd
         * forget the "new" status of the relation.  Ditto for the
         * new-relfilenumber status.
-        *
-        * The rel could have zero refcnt here, so temporarily increment the
-        * refcnt to ensure it's safe to rebuild it.  We can assume that the
-        * current transaction has some lock on the rel already.
         */
-       RelationIncrementReferenceCount(relation);
-       RelationClearRelation(relation, true);
-       RelationDecrementReferenceCount(relation);
+       if (IsTransactionState() && relation->rd_droppedSubid == InvalidSubTransactionId)
+       {
+           /*
+            * The rel could have zero refcnt here, so temporarily increment
+            * the refcnt to ensure it's safe to rebuild it.  We can assume
+            * that the current transaction has some lock on the rel already.
+            */
+           RelationIncrementReferenceCount(relation);
+           RelationClearRelation(relation, true);
+           RelationDecrementReferenceCount(relation);
+       }
+       else
+       {
+           /*
+            * During abort processing, the current resource owner is not
+            * valid and we cannot hold a refcnt.  Without a valid
+            * transaction, RelationClearRelation() would just mark the rel as
+            * invalid anyway, so we can do the same directly.
+            */
+           RelationInvalidateRelation(relation);
+       }
    }
    else
    {