Introduce a maintenance_io_concurrency setting.
authorThomas Munro <[email protected]>
Sun, 15 Mar 2020 23:31:34 +0000 (12:31 +1300)
committerThomas Munro <[email protected]>
Mon, 16 Mar 2020 04:14:26 +0000 (17:14 +1300)
Introduce a GUC and a tablespace option to control I/O prefetching, much
like effective_io_concurrency, but for work that is done on behalf of
many client sessions.

Use the new setting in heapam.c instead of the hard-coded formula
effective_io_concurrency + 10 introduced by commit 558a9165e08.  Go with
a default value of 10 for now, because it's a round number pretty close
to the value used for that existing case.

Discussion: https://postgr.es/m/CA%2BhUKGJUw08dPs_3EUcdO6M90GnjofPYrWp4YSLaBkgYwS-AqA%40mail.gmail.com

12 files changed:
doc/src/sgml/config.sgml
doc/src/sgml/ref/alter_tablespace.sgml
doc/src/sgml/ref/create_tablespace.sgml
src/backend/access/common/reloptions.c
src/backend/access/heap/heapam.c
src/backend/storage/buffer/bufmgr.c
src/backend/utils/cache/spccache.c
src/backend/utils/misc/guc.c
src/bin/psql/tab-complete.c
src/include/commands/tablespace.h
src/include/storage/bufmgr.h
src/include/utils/spccache.h

index 3cac340f3237f2358c6ca503945a56ad8ecc792d..672bf6f1ee7e2086987d167cd3cb06071c5b1c0d 100644 (file)
@@ -2229,6 +2229,26 @@ include_dir 'conf.d'
        </listitem>
       </varlistentry>
 
+      <varlistentry id="guc-maintenance-io-concurrency" xreflabel="maintenance_io_concurrency">
+       <term><varname>maintenance_io_concurrency</varname> (<type>integer</type>)
+       <indexterm>
+        <primary><varname>maintenance_io_concurrency</varname> configuration parameter</primary>
+       </indexterm>
+       </term>
+       <listitem>
+        <para>
+         Similar to <varname>effective_io_concurrency</varname>, but used
+         for maintenance work that is done on behalf of many client sessions.
+        </para>
+        <para>
+         The default is 10 on supported systems, otherwise 0.  This value can
+         be overridden for tables in a particular tablespace by setting the
+         tablespace parameter of the same name (see
+         <xref linkend="sql-altertablespace"/>).
+        </para>
+       </listitem>
+      </varlistentry>
       <varlistentry id="guc-max-worker-processes" xreflabel="max_worker_processes">
        <term><varname>max_worker_processes</varname> (<type>integer</type>)
        <indexterm>
index acec33469f12ca626e858c2f0f7cda624f089698..356fb9f93f32df8f49a2d1c8985f158274571055 100644 (file)
@@ -84,13 +84,16 @@ ALTER TABLESPACE <replaceable>name</replaceable> RESET ( <replaceable class="par
      <para>
       A tablespace parameter to be set or reset.  Currently, the only
       available parameters are <varname>seq_page_cost</varname>,
-      <varname>random_page_cost</varname> and <varname>effective_io_concurrency</varname>.
-      Setting either value for a particular tablespace will override the
+      <varname>random_page_cost</varname>, <varname>effective_io_concurrency</varname>
+      and <varname>maintenance_io_concurrency</varname>.
+      Setting these values for a particular tablespace will override the
       planner's usual estimate of the cost of reading pages from tables in
-      that tablespace, as established by the configuration parameters of the
+      that tablespace, and the executor's prefetching behavior, as established
+      by the configuration parameters of the
       same name (see <xref linkend="guc-seq-page-cost"/>,
       <xref linkend="guc-random-page-cost"/>,
-      <xref linkend="guc-effective-io-concurrency"/>).  This may be useful if
+      <xref linkend="guc-effective-io-concurrency"/>,
+      <xref linkend="guc-maintenance-io-concurrency"/>).  This may be useful if
       one tablespace is located on a disk which is faster or slower than the
       remainder of the I/O subsystem.
      </para>
index c621ec2c6bf802e90bf7c6c4df8d8facf10abe5f..462b8831c27464e3f1e3b5d20e6d7b4b11a5f394 100644 (file)
@@ -106,13 +106,16 @@ CREATE TABLESPACE <replaceable class="parameter">tablespace_name</replaceable>
        <para>
         A tablespace parameter to be set or reset.  Currently, the only
         available parameters are <varname>seq_page_cost</varname>,
-        <varname>random_page_cost</varname> and <varname>effective_io_concurrency</varname>.
-        Setting either value for a particular tablespace will override the
+        <varname>random_page_cost</varname>, <varname>effective_io_concurrency</varname>
+        and <varname>maintenance_io_concurrency</varname>.
+        Setting these values for a particular tablespace will override the
         planner's usual estimate of the cost of reading pages from tables in
-        that tablespace, as established by the configuration parameters of the
+        that tablespace, and the executor's prefetching behavior, as established
+        by the configuration parameters of the
         same name (see <xref linkend="guc-seq-page-cost"/>,
         <xref linkend="guc-random-page-cost"/>,
-        <xref linkend="guc-effective-io-concurrency"/>).  This may be useful if
+        <xref linkend="guc-effective-io-concurrency"/>,
+        <xref linkend="guc-maintenance-io-concurrency"/>).  This may be useful if
         one tablespace is located on a disk which is faster or slower than the
         remainder of the I/O subsystem.
        </para>
index c3d45c7a248ee6f995132c381b6d9c40ce3b4436..ec207d3b26c0e48a8b581f792bc022ed7249efb6 100644 (file)
@@ -349,6 +349,19 @@ static relopt_int intRelOpts[] =
        -1, 0, MAX_IO_CONCURRENCY
 #else
        0, 0, 0
+#endif
+   },
+   {
+       {
+           "maintenance_io_concurrency",
+           "Number of simultaneous requests that can be handled efficiently by the disk subsystem for maintenance work.",
+           RELOPT_KIND_TABLESPACE,
+           ShareUpdateExclusiveLock
+       },
+#ifdef USE_PREFETCH
+       -1, 0, MAX_IO_CONCURRENCY
+#else
+       0, 0, 0
 #endif
    },
    {
@@ -1700,7 +1713,8 @@ tablespace_reloptions(Datum reloptions, bool validate)
    static const relopt_parse_elt tab[] = {
        {"random_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, random_page_cost)},
        {"seq_page_cost", RELOPT_TYPE_REAL, offsetof(TableSpaceOpts, seq_page_cost)},
-       {"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)}
+       {"effective_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, effective_io_concurrency)},
+       {"maintenance_io_concurrency", RELOPT_TYPE_INT, offsetof(TableSpaceOpts, maintenance_io_concurrency)}
    };
 
    return (bytea *) build_reloptions(reloptions, validate,
index 5a32e62ed0e761aab4fc5710e2ff2fff80d75990..29694b8aa4a3b6b242ab0b197f02f9a7221499f3 100644 (file)
@@ -7003,7 +7003,6 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
    Page        hpage;
 #ifdef USE_PREFETCH
    XidHorizonPrefetchState prefetch_state;
-   int         io_concurrency;
    int         prefetch_distance;
 #endif
 
@@ -7026,24 +7025,15 @@ heap_compute_xid_horizon_for_tuples(Relation rel,
    /*
     * Compute the prefetch distance that we will attempt to maintain.
     *
-    * We don't use the regular formula to determine how much to prefetch
-    * here, but instead just add a constant to effective_io_concurrency.
-    * That's because it seems best to do some prefetching here even when
-    * effective_io_concurrency is set to 0, but if the DBA thinks it's OK to
-    * do more prefetching for other operations, then it's probably OK to do
-    * more prefetching in this case, too. It may be that this formula is too
-    * simplistic, but at the moment there is no evidence of that or any idea
-    * about what would work better.
-    *
     * Since the caller holds a buffer lock somewhere in rel, we'd better make
     * sure that isn't a catalog relation before we call code that does
     * syscache lookups, to avoid risk of deadlock.
     */
    if (IsCatalogRelation(rel))
-       io_concurrency = effective_io_concurrency;
+       prefetch_distance = maintenance_io_concurrency;
    else
-       io_concurrency = get_tablespace_io_concurrency(rel->rd_rel->reltablespace);
-   prefetch_distance = Min((io_concurrency) + 10, MAX_IO_CONCURRENCY);
+       prefetch_distance =
+           get_tablespace_maintenance_io_concurrency(rel->rd_rel->reltablespace);
 
    /* Start prefetching. */
    xid_horizon_prefetch_buffer(rel, &prefetch_state, prefetch_distance);
index 7a7748b6955f957ba62bd9c50a86b1e6c436589b..e05e2b34565d077552afa7079943fc5278906fa0 100644 (file)
@@ -119,6 +119,13 @@ bool       track_io_timing = false;
  */
 int            effective_io_concurrency = 0;
 
+/*
+ * Like effective_io_concurrency, but used by maintenance code paths that might
+ * benefit from a higher setting because they work on behalf of many sessions.
+ * Overridden by the tablespace setting of the same name.
+ */
+int            maintenance_io_concurrency = 0;
+
 /*
  * GUC variables about triggering kernel writeback for buffers written; OS
  * dependent defaults are set via the GUC mechanism.
index c4a0f719fb5b01f1982f182116dabb678f37220f..e0c3c1b1c1173eb33659827df816880dcc496d10 100644 (file)
@@ -221,3 +221,17 @@ get_tablespace_io_concurrency(Oid spcid)
    else
        return spc->opts->effective_io_concurrency;
 }
+
+/*
+ * get_tablespace_maintenance_io_concurrency
+ */
+int
+get_tablespace_maintenance_io_concurrency(Oid spcid)
+{
+   TableSpaceCacheEntry *spc = get_tablespace(spcid);
+
+   if (!spc->opts || spc->opts->maintenance_io_concurrency < 0)
+       return maintenance_io_concurrency;
+   else
+       return spc->opts->maintenance_io_concurrency;
+}
index 326e773b25fc6361e91d7cbcf8ad71be3dd574a9..68082315acda0cfcba168a45fed735a27b72b454 100644 (file)
@@ -196,6 +196,7 @@ static bool check_autovacuum_max_workers(int *newval, void **extra, GucSource so
 static bool check_max_wal_senders(int *newval, void **extra, GucSource source);
 static bool check_autovacuum_work_mem(int *newval, void **extra, GucSource source);
 static bool check_effective_io_concurrency(int *newval, void **extra, GucSource source);
+static bool check_maintenance_io_concurrency(int *newval, void **extra, GucSource source);
 static void assign_pgstat_temp_directory(const char *newval, void *extra);
 static bool check_application_name(char **newval, void **extra, GucSource source);
 static void assign_application_name(const char *newval, void *extra);
@@ -2884,6 +2885,24 @@ static struct config_int ConfigureNamesInt[] =
        check_effective_io_concurrency, NULL, NULL
    },
 
+   {
+       {"maintenance_io_concurrency",
+           PGC_USERSET,
+           RESOURCES_ASYNCHRONOUS,
+           gettext_noop("A variant of effective_io_concurrency that is used for maintenance work."),
+           NULL,
+           GUC_EXPLAIN
+       },
+       &maintenance_io_concurrency,
+#ifdef USE_PREFETCH
+       10,
+#else
+       0,
+#endif
+       0, MAX_IO_CONCURRENCY,
+       check_maintenance_io_concurrency, NULL, NULL
+   },
+
    {
        {"backend_flush_after", PGC_USERSET, RESOURCES_ASYNCHRONOUS,
            gettext_noop("Number of pages after which previously performed writes are flushed to disk."),
@@ -11466,6 +11485,19 @@ check_effective_io_concurrency(int *newval, void **extra, GucSource source)
    return true;
 }
 
+static bool
+check_maintenance_io_concurrency(int *newval, void **extra, GucSource source)
+{
+#ifndef USE_PREFETCH
+   if (*newval != 0)
+   {
+       GUC_check_errdetail("maintenance_io_concurrency must be set to 0 on platforms that lack posix_fadvise().");
+       return false;
+   }
+#endif                         /* USE_PREFETCH */
+   return true;
+}
+
 static void
 assign_pgstat_temp_directory(const char *newval, void *extra)
 {
index 174c3db62309fc50361489aeaddc0566131f77b1..ae35fa4aa98d9c54d06cdcf83d642b695131fcdb 100644 (file)
@@ -2140,7 +2140,7 @@ psql_completion(const char *text, int start, int end)
    /* ALTER TABLESPACE <foo> SET|RESET ( */
    else if (Matches("ALTER", "TABLESPACE", MatchAny, "SET|RESET", "("))
        COMPLETE_WITH("seq_page_cost", "random_page_cost",
-                     "effective_io_concurrency");
+                     "effective_io_concurrency", "maintenance_io_concurrency");
 
    /* ALTER TEXT SEARCH */
    else if (Matches("ALTER", "TEXT", "SEARCH"))
index 41c457052d97d98cc8c91716ce978abe9f63d210..fd1b28fca2268edad243e23b38142c3664aed4e5 100644 (file)
@@ -40,6 +40,7 @@ typedef struct TableSpaceOpts
    float8      random_page_cost;
    float8      seq_page_cost;
    int         effective_io_concurrency;
+   int         maintenance_io_concurrency;
 } TableSpaceOpts;
 
 extern Oid CreateTableSpace(CreateTableSpaceStmt *stmt);
index 2bf5afdade6229987e840bbed6f33834c08702c5..d2a5b52f6e20adf783919c33137979a50f75b6a8 100644 (file)
@@ -58,6 +58,7 @@ extern int    bgwriter_lru_maxpages;
 extern double bgwriter_lru_multiplier;
 extern bool track_io_timing;
 extern int effective_io_concurrency;
+extern int maintenance_io_concurrency;
 
 extern int checkpoint_flush_after;
 extern int backend_flush_after;
index 5112ba3c376cc4b3bf098f36847cf97442200476..7e4ec69aa2ef83bf4cdfa1d8dc71b77dc0197801 100644 (file)
@@ -16,5 +16,6 @@
 void       get_tablespace_page_costs(Oid spcid, float8 *spc_random_page_cost,
                                      float8 *spc_seq_page_cost);
 int            get_tablespace_io_concurrency(Oid spcid);
+int            get_tablespace_maintenance_io_concurrency(Oid spcid);
 
 #endif                         /* SPCCACHE_H */