Avoid race in RelationBuildDesc() affecting CREATE INDEX CONCURRENTLY.

author Noah Misch <[email protected]>

Sun, 24 Oct 2021 01:36:38 +0000 (18:36 -0700)

committer Noah Misch <[email protected]>

Sun, 24 Oct 2021 01:36:43 +0000 (18:36 -0700)
author Noah Misch <[email protected]>
Sun, 24 Oct 2021 01:36:38 +0000 (18:36 -0700)
committer Noah Misch <[email protected]>
Sun, 24 Oct 2021 01:36:43 +0000 (18:36 -0700)
diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c

index 2a05f602613a2ec2db7136ef1646d1789f982b6f..0835c0720abc648a0e96ad2d553fb715a0f2b518 100644 (file)
--- a/src/backend/utils/cache/inval.c
+++ b/src/backend/utils/cache/inval.c
@@ -631,12 +631,18 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
   */
  void
  InvalidateSystemCaches(void)
+{
+   InvalidateSystemCachesExtended(false);
+}
+
+void
+InvalidateSystemCachesExtended(bool debug_discard)
  {
     int         i;
  
     InvalidateCatalogSnapshot();
     ResetCatalogCaches();
-   RelationCacheInvalidate();  /* gets smgr and relmap too */
+   RelationCacheInvalidate(debug_discard); /* gets smgr and relmap too */
  
     for (i = 0; i < syscache_callback_count; i++)
     {
@@ -707,7 +713,7 @@ AcceptInvalidationMessages(void)
         if (recursion_depth < 3)
         {
             recursion_depth++;
-           InvalidateSystemCaches();
+           InvalidateSystemCachesExtended(true);
             recursion_depth--;
         }
     }
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c

index 378ef0cd4b5f821835f50c7b58ba50a25992725b..ff0d285e4da0124bdac3e3fbc2ce4e746f1c5c3e 100644 (file)
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -135,6 +135,24 @@ bool       criticalSharedRelcachesBuilt = false;
   */
  static long relcacheInvalsReceived = 0L;
  
+/*
+ * in_progress_list is a stack of ongoing RelationBuildDesc() calls.  CREATE
+ * INDEX CONCURRENTLY makes catalog changes under ShareUpdateExclusiveLock.
+ * It critically relies on each backend absorbing those changes no later than
+ * next transaction start.  Hence, RelationBuildDesc() loops until it finishes
+ * without accepting a relevant invalidation.  (Most invalidation consumers
+ * don't do this.)
+ */
+typedef struct inprogressent
+{
+   Oid         reloid;         /* OID of relation being built */
+   bool        invalidated;    /* whether an invalidation arrived for it */
+} InProgressEnt;
+
+static InProgressEnt *in_progress_list;
+static int in_progress_list_len;
+static int in_progress_list_maxlen;
+
  /*
   * eoxact_list[] stores the OIDs of relations that (might) need AtEOXact
   * cleanup work.  This list intentionally has limited size; if it overflows,
@@ -938,11 +956,27 @@ equalRSDesc(RowSecurityDesc *rsdesc1, RowSecurityDesc *rsdesc2)
  static Relation
  RelationBuildDesc(Oid targetRelId, bool insertIt)
  {
+   int         in_progress_offset;
     Relation    relation;
     Oid         relid;
     HeapTuple   pg_class_tuple;
     Form_pg_class relp;
  
+   /* Register to catch invalidation messages */
+   if (in_progress_list_len >= in_progress_list_maxlen)
+   {
+       int         allocsize;
+
+       allocsize = in_progress_list_maxlen * 2;
+       in_progress_list = repalloc(in_progress_list,
+                                   allocsize * sizeof(*in_progress_list));
+       in_progress_list_maxlen = allocsize;
+   }
+   in_progress_offset = in_progress_list_len++;
+   in_progress_list[in_progress_offset].reloid = targetRelId;
+retry:
+   in_progress_list[in_progress_offset].invalidated = false;
+
     /*
      * find the tuple in pg_class corresponding to the given relation id
      */
@@ -952,7 +986,11 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
      * if no such tuple exists, return NULL
      */
     if (!HeapTupleIsValid(pg_class_tuple))
+   {
+       Assert(in_progress_offset + 1 == in_progress_list_len);
+       in_progress_list_len--;
         return NULL;
+   }
  
     /*
      * get information from the pg_class_tuple
@@ -1078,6 +1116,21 @@ RelationBuildDesc(Oid targetRelId, bool insertIt)
      */
     heap_freetuple(pg_class_tuple);
  
+   /*
+    * If an invalidation arrived mid-build, start over.  Between here and the
+    * end of this function, don't add code that does or reasonably could read
+    * system catalogs.  That range must be free from invalidation processing
+    * for the !insertIt case.  For the insertIt case, RelationCacheInsert()
+    * will enroll this relation in ordinary relcache invalidation processing,
+    */
+   if (in_progress_list[in_progress_offset].invalidated)
+   {
+       RelationDestroyRelation(relation, false);
+       goto retry;
+   }
+   Assert(in_progress_offset + 1 == in_progress_list_len);
+   in_progress_list_len--;
+
     /*
      * Insert newly created relation into relcache hash table, if requested.
      *
@@ -2282,6 +2335,14 @@ RelationClearRelation(Relation relation, bool rebuild)
  
         /* Build temporary entry, but don't link it into hashtable */
         newrel = RelationBuildDesc(save_relid, false);
+
+       /*
+        * Between here and the end of the swap, don't add code that does or
+        * reasonably could read system catalogs.  That range must be free
+        * from invalidation processing.  See RelationBuildDesc() manipulation
+        * of in_progress_list.
+        */
+
         if (newrel == NULL)
         {
             /*
@@ -2457,6 +2518,14 @@ RelationCacheInvalidateEntry(Oid relationId)
         relcacheInvalsReceived++;
         RelationFlushRelation(relation);
     }
+   else
+   {
+       int         i;
+
+       for (i = 0; i < in_progress_list_len; i++)
+           if (in_progress_list[i].reloid == relationId)
+               in_progress_list[i].invalidated = true;
+   }
  }
  
  /*
@@ -2488,9 +2557,14 @@ RelationCacheInvalidateEntry(Oid relationId)
   *  second pass processes nailed-in-cache items before other nondeletable
   *  items.  This should ensure that system catalogs are up to date before
   *  we attempt to use them to reload information about other open relations.
+ *
+ *  After those two phases of work having immediate effects, we normally
+ *  signal any RelationBuildDesc() on the stack to start over.  However, we
+ *  don't do this if called as part of debug_discard_caches.  Otherwise,
+ *  RelationBuildDesc() would become an infinite loop.
   */
  void
-RelationCacheInvalidate(void)
+RelationCacheInvalidate(bool debug_discard)
  {
     HASH_SEQ_STATUS status;
     RelIdCacheEnt *idhentry;
@@ -2498,6 +2572,7 @@ RelationCacheInvalidate(void)
     List       *rebuildFirstList = NIL;
     List       *rebuildList = NIL;
     ListCell   *l;
+   int         i;
  
     /*
      * Reload relation mapping data before starting to reconstruct cache.
@@ -2584,6 +2659,11 @@ RelationCacheInvalidate(void)
         RelationClearRelation(relation, true);
     }
     list_free(rebuildList);
+
+   if (!debug_discard)
+       /* Any RelationBuildDesc() on the stack must start over. */
+       for (i = 0; i < in_progress_list_len; i++)
+           in_progress_list[i].invalidated = true;
  }
  
  /*
@@ -2656,6 +2736,13 @@ AtEOXact_RelationCache(bool isCommit)
     RelIdCacheEnt *idhentry;
     int         i;
  
+   /*
+    * Forget in_progress_list.  This is relevant when we're aborting due to
+    * an error during RelationBuildDesc().
+    */
+   Assert(in_progress_list_len == 0 || !isCommit);
+   in_progress_list_len = 0;
+
     /*
      * Unless the eoxact_list[] overflowed, we only need to examine the rels
      * listed in it.  Otherwise fall back on a hash_seq_search scan.
@@ -2804,6 +2891,14 @@ AtEOSubXact_RelationCache(bool isCommit, SubTransactionId mySubid,
     RelIdCacheEnt *idhentry;
     int         i;
  
+   /*
+    * Forget in_progress_list.  This is relevant when we're aborting due to
+    * an error during RelationBuildDesc().  We don't commit subtransactions
+    * during RelationBuildDesc().
+    */
+   Assert(in_progress_list_len == 0 || !isCommit);
+   in_progress_list_len = 0;
+
     /*
      * Unless the eoxact_list[] overflowed, we only need to examine the rels
      * listed in it.  Otherwise fall back on a hash_seq_search scan.  Same
@@ -3287,6 +3382,7 @@ void
  RelationCacheInitialize(void)
  {
     HASHCTL     ctl;
+   int         allocsize;
  
     /*
      * make sure cache memory context exists
@@ -3303,6 +3399,15 @@ RelationCacheInitialize(void)
     RelationIdCache = hash_create("Relcache by OID", INITRELCACHESIZE,
                                   &ctl, HASH_ELEM | HASH_BLOBS);
  
+   /*
+    * reserve enough in_progress_list slots for many cases
+    */
+   allocsize = 4;
+   in_progress_list =
+       MemoryContextAlloc(CacheMemoryContext,
+                          allocsize * sizeof(*in_progress_list));
+   in_progress_list_maxlen = allocsize;
+
     /*
      * relation mapper needs to be initialized too
      */
diff --git a/src/bin/pgbench/t/022_cic.pl b/src/bin/pgbench/t/022_cic.pl

new file mode 100644 (file)

index 0000000..0230132
--- /dev/null
+++ b/src/bin/pgbench/t/022_cic.pl
@@ -0,0 +1,93 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Test CREATE INDEX CONCURRENTLY with concurrent modifications
+use strict;
+use warnings;
+
+use Config;
+use PostgresNode;
+use TestLib;
+
+use Test::More tests => 4;
+
+my ($node, $result);
+
+#
+# Test set-up
+#
+$node = get_new_node('CIC_test');
+$node->init;
+$node->append_conf('postgresql.conf', 'lock_timeout = 180000');
+$node->start;
+$node->safe_psql('postgres', q(CREATE TABLE tbl(i int)));
+$node->safe_psql('postgres', q(CREATE INDEX idx ON tbl(i)));
+$node->safe_psql(
+   'postgres', q(
+   CREATE FUNCTION heapallindexed() RETURNS void AS $$
+   DECLARE
+       count_seqscan int;
+       count_idxscan int;
+   BEGIN
+       count_seqscan := (SELECT count(*) FROM tbl);
+       SET enable_seqscan = off;
+       count_idxscan := (SELECT count(*) FROM tbl);
+       RESET enable_seqscan;
+       IF count_seqscan <> count_idxscan THEN
+           RAISE 'seqscan found % rows, but idxscan found % rows',
+               count_seqscan, count_idxscan;
+       END IF;
+   END
+   $$ LANGUAGE plpgsql;
+));
+
+#
+# Stress CIC with pgbench
+#
+
+# Run background pgbench with CIC. We cannot mix-in this script into single
+# pgbench: CIC will deadlock with itself occasionally.
+my $pgbench_out   = '';
+my $pgbench_timer = IPC::Run::timeout(180);
+my $pgbench_h     = $node->background_pgbench(
+   '--no-vacuum --client=1 --transactions=200',
+   {
+       '002_pgbench_concurrent_cic' => q(
+           DROP INDEX CONCURRENTLY idx;
+           CREATE INDEX CONCURRENTLY idx ON tbl(i);
+           BEGIN ISOLATION LEVEL REPEATABLE READ;
+           SELECT heapallindexed();
+           ROLLBACK;
+          )
+   },
+   \$pgbench_out,
+   $pgbench_timer);
+
+# Run pgbench.
+$node->pgbench(
+   '--no-vacuum --client=5 --transactions=200',
+   0,
+   [qr{actually processed}],
+   [qr{^$}],
+   'concurrent INSERTs',
+   {
+       '002_pgbench_concurrent_transaction' => q(
+           BEGIN;
+           INSERT INTO tbl VALUES(0);
+           COMMIT;
+         ),
+       '002_pgbench_concurrent_transaction_savepoints' => q(
+           BEGIN;
+           SAVEPOINT s1;
+           INSERT INTO tbl VALUES(0);
+           COMMIT;
+         )
+   });
+
+$pgbench_h->pump_nb;
+$pgbench_h->finish();
+unlike($pgbench_out, qr/aborted in/, "pgbench with CIC works");
+
+# done
+$node->stop;
+done_testing();
diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h

index 122fef29c8374d431365f3a4f78de9d7fba60b05..e3880f7796db717b8f91564b6e964a53c4bc9273 100644 (file)
--- a/src/include/utils/inval.h
+++ b/src/include/utils/inval.h
@@ -61,4 +61,5 @@ extern void CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
  extern void CallSyscacheCallbacks(int cacheid, uint32 hashvalue);
  
  extern void InvalidateSystemCaches(void);
+extern void InvalidateSystemCachesExtended(bool debug_discard);
  #endif   /* INVAL_H */
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h

index 8acc48b593a31dbed983ba9d8234ae240816fec8..f3305dd1a07ea2987c0b047e4f15aba88e2e5d2e 100644 (file)
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -107,7 +107,7 @@ extern void RelationForgetRelation(Oid rid);
  
  extern void RelationCacheInvalidateEntry(Oid relationId);
  
-extern void RelationCacheInvalidate(void);
+extern void RelationCacheInvalidate(bool debug_discard);
  
  extern void RelationCloseSmgrByOid(Oid relationId);
  
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm

index 5f5a7d273192957a927e0e4e30250d7a9fea7c7d..01ce4f0cbb5b783673603ec191be99dc86ca1485 100644 (file)
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -1340,6 +1340,141 @@ sub psql
     }
  }
  
+# Common sub of pgbench-invoking interfaces.  Makes any requested script files
+# and returns pgbench command-line options causing use of those files.
+sub _pgbench_make_files
+{
+   my ($self, $files) = @_;
+   my @file_opts;
+
+   if (defined $files)
+   {
+
+       # note: files are ordered for determinism
+       for my $fn (sort keys %$files)
+       {
+           my $filename = $self->basedir . '/' . $fn;
+           push @file_opts, '-f', $filename;
+
+           # cleanup file weight
+           $filename =~ s/\@\d+$//;
+
+           #push @filenames, $filename;
+           # filenames are expected to be unique on a test
+           if (-e $filename)
+           {
+               ok(0, "$filename must not already exist");
+               unlink $filename or die "cannot unlink $filename: $!";
+           }
+           TestLib::append_to_file($filename, $$files{$fn});
+       }
+   }
+
+   return @file_opts;
+}
+
+=pod
+
+=item $node->pgbench($opts, $stat, $out, $err, $name, $files, @args)
+
+Invoke B<pgbench>, with parameters and files.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $stat
+
+Expected exit status.
+
+=item $out
+
+Reference to a regexp list that must match stdout.
+
+=item $err
+
+Reference to a regexp list that must match stderr.
+
+=item $name
+
+Name of test for error messages.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=item @args
+
+Further raw options or arguments.
+
+=back
+
+=cut
+
+sub pgbench
+{
+   local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+   my ($self, $opts, $stat, $out, $err, $name, $files, @args) = @_;
+   my @cmd = (
+       'pgbench',
+       split(/\s+/, $opts),
+       $self->_pgbench_make_files($files), @args);
+
+   $self->command_checks_all(\@cmd, $stat, $out, $err, $name);
+}
+
+=pod
+
+=item $node->background_pgbench($opts, $files, \$stdout, $timer) => harness
+
+Invoke B<pgbench> and return an IPC::Run harness object.  The process's stdin
+is empty, and its stdout and stderr go to the $stdout scalar reference.  This
+allows the caller to act on other parts of the system while B<pgbench> is
+running.  Errors from B<pgbench> are the caller's problem.
+
+The specified timer object is attached to the harness, as well.  It's caller's
+responsibility to select the timeout length, and to restart the timer after
+each command if the timeout is per-command.
+
+Be sure to "finish" the harness when done with it.
+
+=over
+
+=item $opts
+
+Options as a string to be split on spaces.
+
+=item $files
+
+Reference to filename/contents dictionary.
+
+=back
+
+=cut
+
+sub background_pgbench
+{
+   my ($self, $opts, $files, $stdout, $timer) = @_;
+
+   my @cmd =
+     ('pgbench', split(/\s+/, $opts), $self->_pgbench_make_files($files));
+
+   local $ENV{PGHOST} = $self->host;
+   local $ENV{PGPORT} = $self->port;
+
+   my $stdin = "";
+   # IPC::Run would otherwise append to existing contents:
+   $$stdout = "" if ref($stdout);
+
+   my $harness = IPC::Run::start \@cmd, '<', \$stdin, '>', $stdout, '2>&1',
+     $timer;
+
+   return $harness;
+}
+
  =pod
  
  =item $node->poll_query_until(dbname, query)
@@ -1448,6 +1583,26 @@ sub command_like
  
  =pod
  
+=item $node->command_checks_all(...)
+
+TestLib::command_checks_all with our connection parameters. See
+command_ok(...)
+
+=cut
+
+sub command_checks_all
+{
+   my $self = shift;
+
+   local $ENV{PGHOST} = $self->host;
+   local $ENV{PGPORT} = $self->port;
+
+   TestLib::command_checks_all(@_);
+   return;
+}
+
+=pod
+
  =item $node->issues_sql_like(cmd, expected_sql, test_name)
  
  Run a command on the node, then verify that $expected_sql appears in the
diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm

index f466def1d709dda7975f1b85aac6a23642bbe279..7c98d286a12504abca1bd01c775862d07114466c 100644 (file)
--- a/src/test/perl/TestLib.pm
+++ b/src/test/perl/TestLib.pm
@@ -360,4 +360,47 @@ sub command_like
     like($stdout, $expected_stdout, "$test_name: matches");
  }
  
+# Run a command and check its status and outputs.
+# The 5 arguments are:
+# - cmd: ref to list for command, options and arguments to run
+# - ret: expected exit status
+# - out: ref to list of re to be checked against stdout (all must match)
+# - err: ref to list of re to be checked against stderr (all must match)
+# - test_name: name of test
+sub command_checks_all
+{
+   my ($cmd, $expected_ret, $out, $err, $test_name) = @_;
+
+   # run command
+   my ($stdout, $stderr);
+   print("# Running: " . join(" ", @{$cmd}) . "\n");
+   IPC::Run::run($cmd, '>', \$stdout, '2>', \$stderr);
+
+   # See http://perldoc.perl.org/perlvar.html#%24CHILD_ERROR
+   my $ret = $?;
+   die "command exited with signal " . ($ret & 127)
+     if $ret & 127;
+   $ret = $ret >> 8;
+
+   foreach ($stderr, $stdout) { s/\r\n/\n/g if $Config{osname} eq 'msys'; }
+
+   # check status
+   ok($ret == $expected_ret,
+       "$test_name status (got $ret vs expected $expected_ret)");
+
+   # check stdout
+   for my $re (@$out)
+   {
+       like($stdout, $re, "$test_name stdout /$re/");
+   }
+
+   # check stderr
+   for my $re (@$err)
+   {
+       like($stderr, $re, "$test_name stderr /$re/");
+   }
+
+   return;
+}
+
  1;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list

index 506d95453a6fcbcb16d062e75eeac29c18291250..9e5fac096daa6e3c8e1e105e20b2eb09ea589f9c 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -918,6 +918,7 @@ ImportForeignSchemaStmt
  ImportForeignSchemaType
  ImportForeignSchema_function
  ImportQual
+InProgressEnt
  InclusionOpaque
  IncrementVarSublevelsUp_context
  Index
author	Noah Misch <[email protected]>
	Sun, 24 Oct 2021 01:36:38 +0000 (18:36 -0700)
committer	Noah Misch <[email protected]>
	Sun, 24 Oct 2021 01:36:43 +0000 (18:36 -0700)
src/backend/utils/cache/inval.c		patch \| blob \| blame \| history
src/backend/utils/cache/relcache.c		patch \| blob \| blame \| history
src/bin/pgbench/t/022_cic.pl	[new file with mode: 0644]	patch \| blob
src/include/utils/inval.h		patch \| blob \| blame \| history
src/include/utils/relcache.h		patch \| blob \| blame \| history
src/test/perl/PostgresNode.pm		patch \| blob \| blame \| history
src/test/perl/TestLib.pm		patch \| blob \| blame \| history
src/tools/pgindent/typedefs.list		patch \| blob \| blame \| history