Fix catcache invalidation of a list entry that's being built
authorHeikki Linnakangas <[email protected]>
Tue, 14 Jan 2025 12:28:49 +0000 (14:28 +0200)
committerHeikki Linnakangas <[email protected]>
Tue, 14 Jan 2025 12:28:49 +0000 (14:28 +0200)
If a new catalog tuple is inserted that belongs to a catcache list
entry, and cache invalidation happens while the list entry is being
built, the list entry might miss the newly inserted tuple.

To fix, change the way we detect concurrent invalidations while a
catcache entry is being built. Keep a stack of entries that are being
built, and apply cache invalidation to those entries in addition to
the real catcache entries. This is similar to the in-progress list in
relcache.c.

Back-patch to all supported versions.

Reviewed-by: Noah Misch
Discussion: https://www.postgresql.org/message-id/2234dc98-06fe-42ed-b5db-ac17384dc880@iki.fi

src/backend/utils/cache/catcache.c
src/backend/utils/cache/inval.c
src/include/utils/catcache.h
src/test/modules/test_misc/meson.build
src/test/modules/test_misc/t/007_catcache_inval.pl [new file with mode: 0644]
src/tools/pgindent/typedefs.list

index 96eb68d63ab86a92bcc7587f96049d79ad16d858..9ad7681f1555200fb96b3cf24c9197af2208953c 100644 (file)
 #include "utils/catcache.h"
 #include "utils/datum.h"
 #include "utils/fmgroids.h"
+#include "utils/injection_point.h"
 #include "utils/inval.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 #include "utils/resowner.h"
 #include "utils/syscache.h"
 
+/*
+ * If a catcache invalidation is processed while we are in the middle of
+ * creating a catcache entry (or list), it might apply to the entry we're
+ * creating, making it invalid before it's been inserted to the catcache.  To
+ * catch such cases, we have a stack of "create-in-progress" entries.  Cache
+ * invalidation marks any matching entries in the stack as dead, in addition
+ * to the actual CatCTup and CatCList entries.
+ */
+typedef struct CatCInProgress
+{
+       CatCache   *cache;                      /* cache that the entry belongs to */
+       uint32          hash_value;             /* hash of the entry; ignored for lists */
+       bool            list;                   /* is it a list entry? */
+       bool            dead;                   /* set when the entry is invalidated */
+       struct CatCInProgress *next;
+} CatCInProgress;
+
+static CatCInProgress *catcache_in_progress_stack = NULL;
 
  /* #define CACHEDEBUG */      /* turns DEBUG elogs on */
 
@@ -92,8 +111,7 @@ static void CatCacheRemoveCList(CatCache *cache, CatCList *cl);
 static void RehashCatCache(CatCache *cp);
 static void RehashCatCacheLists(CatCache *cp);
 static void CatalogCacheInitializeCache(CatCache *cache);
-static CatCTup *CatalogCacheCreateEntry(CatCache *cache,
-                                                                               HeapTuple ntp, SysScanDesc scandesc,
+static CatCTup *CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp,
                                                                                Datum *arguments,
                                                                                uint32 hashValue, Index hashIndex);
 
@@ -661,6 +679,16 @@ CatCacheInvalidate(CatCache *cache, uint32 hashValue)
                        /* could be multiple matches, so keep looking! */
                }
        }
+
+       /* Also invalidate any entries that are being built */
+       for (CatCInProgress *e = catcache_in_progress_stack; e != NULL; e = e->next)
+       {
+               if (e->cache == cache)
+               {
+                       if (e->list || e->hash_value == hashValue)
+                               e->dead = true;
+               }
+       }
 }
 
 /* ----------------------------------------------------------------
@@ -697,9 +725,15 @@ CreateCacheMemoryContext(void)
  *
  * This is not very efficient if the target cache is nearly empty.
  * However, it shouldn't need to be efficient; we don't invoke it often.
+ *
+ * If 'debug_discard' is true, we are being called as part of
+ * debug_discard_caches.  In that case, the cache is not reset for
+ * correctness, but just to get more testing of cache invalidation.  We skip
+ * resetting in-progress build entries in that case, or we'd never make any
+ * progress.
  */
 static void
-ResetCatalogCache(CatCache *cache)
+ResetCatalogCache(CatCache *cache, bool debug_discard)
 {
        dlist_mutable_iter iter;
        int                     i;
@@ -743,6 +777,16 @@ ResetCatalogCache(CatCache *cache)
 #endif
                }
        }
+
+       /* Also invalidate any entries that are being built */
+       if (!debug_discard)
+       {
+               for (CatCInProgress *e = catcache_in_progress_stack; e != NULL; e = e->next)
+               {
+                       if (e->cache == cache)
+                               e->dead = true;
+               }
+       }
 }
 
 /*
@@ -752,6 +796,12 @@ ResetCatalogCache(CatCache *cache)
  */
 void
 ResetCatalogCaches(void)
+{
+       ResetCatalogCachesExt(false);
+}
+
+void
+ResetCatalogCachesExt(bool debug_discard)
 {
        slist_iter      iter;
 
@@ -761,7 +811,7 @@ ResetCatalogCaches(void)
        {
                CatCache   *cache = slist_container(CatCache, cc_next, iter.cur);
 
-               ResetCatalogCache(cache);
+               ResetCatalogCache(cache, debug_discard);
        }
 
        CACHE_elog(DEBUG2, "end of ResetCatalogCaches call");
@@ -795,7 +845,7 @@ CatalogCacheFlushCatalog(Oid catId)
                if (cache->cc_reloid == catId)
                {
                        /* Yes, so flush all its contents */
-                       ResetCatalogCache(cache);
+                       ResetCatalogCache(cache, false);
 
                        /* Tell inval.c to call syscache callbacks for this cache */
                        CallSyscacheCallbacks(cache->id, 0);
@@ -1492,7 +1542,7 @@ SearchCatCacheMiss(CatCache *cache,
 
                while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
                {
-                       ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
+                       ct = CatalogCacheCreateEntry(cache, ntp, NULL,
                                                                                 hashValue, hashIndex);
                        /* upon failure, we must start the scan over */
                        if (ct == NULL)
@@ -1527,7 +1577,7 @@ SearchCatCacheMiss(CatCache *cache,
                if (IsBootstrapProcessingMode())
                        return NULL;
 
-               ct = CatalogCacheCreateEntry(cache, NULL, NULL, arguments,
+               ct = CatalogCacheCreateEntry(cache, NULL, arguments,
                                                                         hashValue, hashIndex);
 
                /* Creating a negative cache entry shouldn't fail */
@@ -1664,6 +1714,8 @@ SearchCatCacheList(CatCache *cache,
        HeapTuple       ntp;
        MemoryContext oldcxt;
        int                     i;
+       CatCInProgress *save_in_progress;
+       CatCInProgress in_progress_ent;
 
        /*
         * one-time startup overhead for each cache
@@ -1778,12 +1830,28 @@ SearchCatCacheList(CatCache *cache,
         */
        ctlist = NIL;
 
+       /*
+        * Cache invalidation can happen while we're building the list.
+        * CatalogCacheCreateEntry() handles concurrent invalidation of individual
+        * tuples, but it's also possible that a new entry is concurrently added
+        * that should be part of the list we're building.  Register an
+        * "in-progress" entry that will receive the invalidation, until we have
+        * built the final list entry.
+        */
+       save_in_progress = catcache_in_progress_stack;
+       in_progress_ent.next = catcache_in_progress_stack;
+       in_progress_ent.cache = cache;
+       in_progress_ent.hash_value = lHashValue;
+       in_progress_ent.list = true;
+       in_progress_ent.dead = false;
+       catcache_in_progress_stack = &in_progress_ent;
+
        PG_TRY();
        {
                ScanKeyData cur_skey[CATCACHE_MAXKEYS];
                Relation        relation;
                SysScanDesc scandesc;
-               bool            stale;
+               bool            first_iter = true;
 
                relation = table_open(cache->cc_reloid, AccessShareLock);
 
@@ -1797,8 +1865,32 @@ SearchCatCacheList(CatCache *cache,
                cur_skey[2].sk_argument = v3;
                cur_skey[3].sk_argument = v4;
 
+               /*
+                * Scan the table for matching entries.  If an invalidation arrives
+                * mid-build, we will loop back here to retry.
+                */
                do
                {
+                       /*
+                        * If we are retrying, release refcounts on any items created on
+                        * the previous iteration.  We dare not try to free them if
+                        * they're now unreferenced, since an error while doing that would
+                        * result in the PG_CATCH below doing extra refcount decrements.
+                        * Besides, we'll likely re-adopt those items in the next
+                        * iteration, so it's not worth complicating matters to try to get
+                        * rid of them.
+                        */
+                       foreach(ctlist_item, ctlist)
+                       {
+                               ct = (CatCTup *) lfirst(ctlist_item);
+                               Assert(ct->c_list == NULL);
+                               Assert(ct->refcount > 0);
+                               ct->refcount--;
+                       }
+                       /* Reset ctlist in preparation for new try */
+                       ctlist = NIL;
+                       in_progress_ent.dead = false;
+
                        scandesc = systable_beginscan(relation,
                                                                                  cache->cc_indexoid,
                                                                                  IndexScanOK(cache),
@@ -1809,9 +1901,15 @@ SearchCatCacheList(CatCache *cache,
                        /* The list will be ordered iff we are doing an index scan */
                        ordered = (scandesc->irel != NULL);
 
-                       stale = false;
+                       /* Injection point to help testing the recursive invalidation case */
+                       if (first_iter)
+                       {
+                               INJECTION_POINT("catcache-list-miss-systable-scan-started");
+                               first_iter = false;
+                       }
 
-                       while (HeapTupleIsValid(ntp = systable_getnext(scandesc)))
+                       while (HeapTupleIsValid(ntp = systable_getnext(scandesc)) &&
+                                  !in_progress_ent.dead)
                        {
                                uint32          hashValue;
                                Index           hashIndex;
@@ -1853,30 +1951,13 @@ SearchCatCacheList(CatCache *cache,
                                if (!found)
                                {
                                        /* We didn't find a usable entry, so make a new one */
-                                       ct = CatalogCacheCreateEntry(cache, ntp, scandesc, NULL,
+                                       ct = CatalogCacheCreateEntry(cache, ntp, NULL,
                                                                                                 hashValue, hashIndex);
+
                                        /* upon failure, we must start the scan over */
                                        if (ct == NULL)
                                        {
-                                               /*
-                                                * Release refcounts on any items we already had.  We
-                                                * dare not try to free them if they're now
-                                                * unreferenced, since an error while doing that would
-                                                * result in the PG_CATCH below doing extra refcount
-                                                * decrements.  Besides, we'll likely re-adopt those
-                                                * items in the next iteration, so it's not worth
-                                                * complicating matters to try to get rid of them.
-                                                */
-                                               foreach(ctlist_item, ctlist)
-                                               {
-                                                       ct = (CatCTup *) lfirst(ctlist_item);
-                                                       Assert(ct->c_list == NULL);
-                                                       Assert(ct->refcount > 0);
-                                                       ct->refcount--;
-                                               }
-                                               /* Reset ctlist in preparation for new try */
-                                               ctlist = NIL;
-                                               stale = true;
+                                               in_progress_ent.dead = true;
                                                break;
                                        }
                                }
@@ -1888,7 +1969,7 @@ SearchCatCacheList(CatCache *cache,
                        }
 
                        systable_endscan(scandesc);
-               } while (stale);
+               } while (in_progress_ent.dead);
 
                table_close(relation, AccessShareLock);
 
@@ -1916,6 +1997,9 @@ SearchCatCacheList(CatCache *cache,
        }
        PG_CATCH();
        {
+               Assert(catcache_in_progress_stack == &in_progress_ent);
+               catcache_in_progress_stack = save_in_progress;
+
                foreach(ctlist_item, ctlist)
                {
                        ct = (CatCTup *) lfirst(ctlist_item);
@@ -1934,6 +2018,8 @@ SearchCatCacheList(CatCache *cache,
                PG_RE_THROW();
        }
        PG_END_TRY();
+       Assert(catcache_in_progress_stack == &in_progress_ent);
+       catcache_in_progress_stack = save_in_progress;
 
        cl->cl_magic = CL_MAGIC;
        cl->my_cache = cache;
@@ -2006,23 +2092,6 @@ ReleaseCatCacheListWithOwner(CatCList *list, ResourceOwner resowner)
 }
 
 
-/*
- * equalTuple
- *             Are these tuples memcmp()-equal?
- */
-static bool
-equalTuple(HeapTuple a, HeapTuple b)
-{
-       uint32          alen;
-       uint32          blen;
-
-       alen = a->t_len;
-       blen = b->t_len;
-       return (alen == blen &&
-                       memcmp((char *) a->t_data,
-                                  (char *) b->t_data, blen) == 0);
-}
-
 /*
  * CatalogCacheCreateEntry
  *             Create a new CatCTup entry, copying the given HeapTuple and other
@@ -2030,34 +2099,33 @@ equalTuple(HeapTuple a, HeapTuple b)
  *
  * To create a normal cache entry, ntp must be the HeapTuple just fetched
  * from scandesc, and "arguments" is not used.  To create a negative cache
- * entry, pass NULL for ntp and scandesc; then "arguments" is the cache
- * keys to use.  In either case, hashValue/hashIndex are the hash values
- * computed from the cache keys.
+ * entry, pass NULL for ntp; then "arguments" is the cache keys to use.
+ * In either case, hashValue/hashIndex are the hash values computed from
+ * the cache keys.
  *
  * Returns NULL if we attempt to detoast the tuple and observe that it
  * became stale.  (This cannot happen for a negative entry.)  Caller must
  * retry the tuple lookup in that case.
  */
 static CatCTup *
-CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
-                                               Datum *arguments,
+CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, Datum *arguments,
                                                uint32 hashValue, Index hashIndex)
 {
        CatCTup    *ct;
-       HeapTuple       dtp;
        MemoryContext oldcxt;
 
        if (ntp)
        {
                int                     i;
+               HeapTuple       dtp = NULL;
 
                /*
-                * The visibility recheck below essentially never fails during our
-                * regression tests, and there's no easy way to force it to fail for
-                * testing purposes.  To ensure we have test coverage for the retry
-                * paths in our callers, make debug builds randomly fail about 0.1% of
-                * the times through this code path, even when there's no toasted
-                * fields.
+                * The invalidation of the in-progress entry essentially never happens
+                * during our regression tests, and there's no easy way to force it to
+                * fail for testing purposes.  To ensure we have test coverage for the
+                * retry paths in our callers, make debug builds randomly fail about
+                * 0.1% of the times through this code path, even when there's no
+                * toasted fields.
                 */
 #ifdef USE_ASSERT_CHECKING
                if (pg_prng_uint32(&pg_global_prng_state) <= (PG_UINT32_MAX / 1000))
@@ -2073,34 +2141,34 @@ CatalogCacheCreateEntry(CatCache *cache, HeapTuple ntp, SysScanDesc scandesc,
                 */
                if (HeapTupleHasExternal(ntp))
                {
-                       bool            need_cmp = IsInplaceUpdateOid(cache->cc_reloid);
-                       HeapTuple       before = NULL;
-                       bool            matches = true;
-
-                       if (need_cmp)
-                               before = heap_copytuple(ntp);
-                       dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
+                       CatCInProgress *save_in_progress;
+                       CatCInProgress in_progress_ent;
 
                        /*
                         * The tuple could become stale while we are doing toast table
-                        * access (since AcceptInvalidationMessages can run then).
-                        * equalTuple() detects staleness from inplace updates, while
-                        * systable_recheck_tuple() detects staleness from normal updates.
-                        *
-                        * While this equalTuple() follows the usual rule of reading with
-                        * a pin and no buffer lock, it warrants suspicion since an
-                        * inplace update could appear at any moment.  It's safe because
-                        * the inplace update sends an invalidation that can't reorder
-                        * before the inplace heap change.  If the heap change reaches
-                        * this process just after equalTuple() looks, we've not missed
-                        * its inval.
+                        * access (since AcceptInvalidationMessages can run then).  The
+                        * invalidation will mark our in-progress entry as dead.
                         */
-                       if (need_cmp)
+                       save_in_progress = catcache_in_progress_stack;
+                       in_progress_ent.next = catcache_in_progress_stack;
+                       in_progress_ent.cache = cache;
+                       in_progress_ent.hash_value = hashValue;
+                       in_progress_ent.list = false;
+                       in_progress_ent.dead = false;
+                       catcache_in_progress_stack = &in_progress_ent;
+
+                       PG_TRY();
                        {
-                               matches = equalTuple(before, ntp);
-                               heap_freetuple(before);
+                               dtp = toast_flatten_tuple(ntp, cache->cc_tupdesc);
                        }
-                       if (!matches || !systable_recheck_tuple(scandesc, ntp))
+                       PG_FINALLY();
+                       {
+                               Assert(catcache_in_progress_stack == &in_progress_ent);
+                               catcache_in_progress_stack = save_in_progress;
+                       }
+                       PG_END_TRY();
+
+                       if (in_progress_ent.dead)
                        {
                                heap_freetuple(dtp);
                                return NULL;
index 254947e943beb85afc5eda085e96990f3a2669c0..f41d314eae3f439a88ee51985c10bce03bee1bad 100644 (file)
@@ -733,7 +733,7 @@ InvalidateSystemCachesExtended(bool debug_discard)
        int                     i;
 
        InvalidateCatalogSnapshot();
-       ResetCatalogCaches();
+       ResetCatalogCachesExt(debug_discard);
        RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */
 
        for (i = 0; i < syscache_callback_count; i++)
index 1f2ca533aa989570f953d22d07ff94b8e0a51a79..277ec33c00bac3a823c85e92b30c36306f37eb13 100644 (file)
@@ -220,6 +220,7 @@ extern CatCList *SearchCatCacheList(CatCache *cache, int nkeys,
 extern void ReleaseCatCacheList(CatCList *list);
 
 extern void ResetCatalogCaches(void);
+extern void ResetCatalogCachesExt(bool debug_discard);
 extern void CatalogCacheFlushCatalog(Oid catId);
 extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue);
 extern void PrepareToInvalidateCacheTuple(Relation relation,
index 65a9518a00d78dc518e3f9a074933571524370dc..9c50de7efb0f701068d41a0701e4d00ca21ead0d 100644 (file)
@@ -15,6 +15,7 @@ tests += {
       't/004_io_direct.pl',
       't/005_timeouts.pl',
       't/006_signal_autovacuum.pl',
+      't/007_catcache_inval.pl',
     ],
   },
 }
diff --git a/src/test/modules/test_misc/t/007_catcache_inval.pl b/src/test/modules/test_misc/t/007_catcache_inval.pl
new file mode 100644 (file)
index 0000000..422301b
--- /dev/null
@@ -0,0 +1,98 @@
+
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Test recursive catalog cache invalidation, i.e. invalidation while a
+# catalog cache entry is being built.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+       plan skip_all => 'Injection points not supported by this build';
+}
+
+# Node initialization
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init();
+$node->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node->check_extension('injection_points'))
+{
+       plan skip_all => 'Extension injection_points not installed';
+}
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points;');
+
+
+sub randStr
+{
+       my $len = shift;
+       my @chars = ("A" .. "Z", "a" .. "z", "0" .. "9");
+       return join '', map { @chars[ rand @chars ] } 1 .. $len;
+}
+
+# Create a function with a large body, so that it is toasted.
+my $longtext = randStr(10000);
+$node->safe_psql(
+       'postgres', qq[
+    CREATE FUNCTION foofunc(dummy integer) RETURNS integer AS \$\$ SELECT 1; /* $longtext */ \$\$ LANGUAGE SQL
+]);
+
+my $psql_session = $node->background_psql('postgres');
+my $psql_session2 = $node->background_psql('postgres');
+
+# Set injection point in the session, to pause while populating the
+# catcache list
+$psql_session->query_safe(
+       qq[
+    SELECT injection_points_set_local();
+    SELECT injection_points_attach('catcache-list-miss-systable-scan-started', 'wait');
+]);
+
+# This pauses on the injection point while populating catcache list
+# for functions with name "foofunc"
+$psql_session->query_until(
+       qr/starting_bg_psql/, q(
+   \echo starting_bg_psql
+   SELECT foofunc(1);
+));
+
+# While the first session is building the catcache list, create a new
+# function that overloads the same name. This sends a catcache
+# invalidation.
+$node->safe_psql(
+       'postgres', qq[
+    CREATE FUNCTION foofunc() RETURNS integer AS \$\$ SELECT 123 \$\$ LANGUAGE SQL
+]);
+
+# Continue the paused session. It will continue to construct the
+# catcache list, and will accept invalidations while doing that.
+#
+# (The fact that the first function has a large body is crucial,
+# because the cache invalidation is accepted during detoasting.  If
+# the body is not toasted, the invalidation is processed after
+# building the catcache list, which avoids the recursion that we are
+# trying to exercise here.)
+#
+# The "SELECT foofunc(1)" query will now finish.
+$psql_session2->query_safe(
+       qq[
+    SELECT injection_points_wakeup('catcache-list-miss-systable-scan-started');
+    SELECT injection_points_detach('catcache-list-miss-systable-scan-started');
+]);
+
+# Test that the new function is visible to the session.
+$psql_session->query_safe("SELECT foofunc();");
+
+ok($psql_session->quit);
+ok($psql_session2->quit);
+
+done_testing();
index eb93debe1088aacfa2a843d1931fc38e61bea3d7..3a392f4ed9a39260e4018870ddbbf392b01bc8fd 100644 (file)
@@ -384,6 +384,7 @@ CaseTestExpr
 CaseWhen
 Cash
 CastInfo
+CatCInProgress
 CatCList
 CatCTup
 CatCache