Fix BRIN cost estimation
authorAlvaro Herrera <[email protected]>
Thu, 6 Apr 2017 20:49:26 +0000 (17:49 -0300)
committerAlvaro Herrera <[email protected]>
Thu, 6 Apr 2017 20:51:53 +0000 (17:51 -0300)
The original code was overly optimistic about the cost of scanning a
BRIN index, leading to BRIN indexes being selected when they'd be a
worse choice than some other index.  This complete rewrite should be
more accurate.

Author: David Rowley, based on an earlier patch by Emre Hasegeli
Reviewed-by: Emre Hasegeli
Discussion: https://postgr.es/m/CAKJS1f9n-Wapop5Xz1dtGdpdqmzeGqQK4sV2MK-zZugfC14Xtw@mail.gmail.com

src/backend/access/brin/brin.c
src/backend/utils/adt/selfuncs.c
src/include/access/brin.h
src/test/regress/expected/brin.out
src/test/regress/sql/brin.sql

index 649f3488c20d5211081108ca024b867bc9c9cfe6..cf3139cbabf04b803ca2eade33db3ba94f6fb2c1 100644 (file)
@@ -1049,6 +1049,27 @@ brin_free_desc(BrinDesc *bdesc)
        MemoryContextDelete(bdesc->bd_context);
 }
 
+/*
+ * Fetch index's statistical data into *stats
+ */
+void
+brinGetStats(Relation index, BrinStatsData *stats)
+{
+       Buffer          metabuffer;
+       Page            metapage;
+       BrinMetaPageData *metadata;
+
+       metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
+       LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
+       metapage = BufferGetPage(metabuffer);
+       metadata = (BrinMetaPageData *) PageGetContents(metapage);
+
+       stats->pagesPerRange = metadata->pagesPerRange;
+       stats->revmapNumPages = metadata->lastRevmapPage - 1;
+
+       UnlockReleaseBuffer(metabuffer);
+}
+
 /*
  * Initialize a BrinBuildState appropriate to create tuples on the given index.
  */
index f5cffcb2eac0549f809e8e0233db8ad374a274dd..b6893e22bfafa97ee3c1cb243f33ee33a67f964d 100644 (file)
 #include <float.h>
 #include <math.h>
 
+#include "access/brin.h"
 #include "access/gin.h"
 #include "access/htup_details.h"
 #include "access/sysattr.h"
@@ -7721,56 +7722,200 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
 {
        IndexOptInfo *index = path->indexinfo;
        List       *indexQuals = path->indexquals;
-       List       *indexOrderBys = path->indexorderbys;
        double          numPages = index->pages;
-       double          numTuples = index->tuples;
+       RelOptInfo *baserel = index->rel;
+       RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
        List       *qinfos;
        Cost            spc_seq_page_cost;
        Cost            spc_random_page_cost;
-       double          qual_op_cost;
        double          qual_arg_cost;
+       double          qualSelectivity;
+       BrinStatsData statsData;
+       double          indexRanges;
+       double          minimalRanges;
+       double          estimatedRanges;
+       double          selec;
+       Relation        indexRel;
+       ListCell   *l;
+       VariableStatData vardata;
 
-       /* Do preliminary analysis of indexquals */
-       qinfos = deconstruct_indexquals(path);
+       Assert(rte->rtekind == RTE_RELATION);
 
-       /* fetch estimated page cost for tablespace containing index */
+       /* fetch estimated page cost for the tablespace containing the index */
        get_tablespace_page_costs(index->reltablespace,
                                                          &spc_random_page_cost,
                                                          &spc_seq_page_cost);
 
        /*
-        * BRIN indexes are always read in full; use that as startup cost.
+        * Obtain some data from the index itself.
+        */
+       indexRel = index_open(index->indexoid, AccessShareLock);
+       brinGetStats(indexRel, &statsData);
+       index_close(indexRel, AccessShareLock);
+
+       /*
+        * Compute index correlation
         *
-        * XXX maybe only include revmap pages here?
+        * Because we can use all index quals equally when scanning, we can use
+        * the largest correlation (in absolute value) among columns used by the
+        * query.  Start at zero, the worst possible case.  If we cannot find
+        * any correlation statistics, we will keep it as 0.
+        */
+       *indexCorrelation = 0;
+
+       qinfos = deconstruct_indexquals(path);
+       foreach(l, qinfos)
+       {
+               IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
+               AttrNumber      attnum = index->indexkeys[qinfo->indexcol];
+
+               /* attempt to lookup stats in relation for this index column */
+               if (attnum != 0)
+               {
+                       /* Simple variable -- look to stats for the underlying table */
+                       if (get_relation_stats_hook &&
+                               (*get_relation_stats_hook) (root, rte, attnum, &vardata))
+                       {
+                               /*
+                                * The hook took control of acquiring a stats tuple.  If it
+                                * did supply a tuple, it'd better have supplied a freefunc.
+                                */
+                               if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
+                                       elog(ERROR,
+                                                "no function provided to release variable stats with");
+                       }
+                       else
+                       {
+                               vardata.statsTuple =
+                                       SearchSysCache3(STATRELATTINH,
+                                                                       ObjectIdGetDatum(rte->relid),
+                                                                       Int16GetDatum(attnum),
+                                                                       BoolGetDatum(false));
+                               vardata.freefunc = ReleaseSysCache;
+                       }
+               }
+               else
+               {
+                       /*
+                        * Looks like we've found an expression column in the index. Let's
+                        * see if there's any stats for it.
+                        */
+
+                       /* get the attnum from the 0-based index. */
+                       attnum = qinfo->indexcol + 1;
+
+                       if (get_index_stats_hook &&
+                               (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
+                       {
+                               /*
+                                * The hook took control of acquiring a stats tuple.  If it did
+                                * supply a tuple, it'd better have supplied a freefunc.
+                                */
+                               if (HeapTupleIsValid(vardata.statsTuple) &&
+                                       !vardata.freefunc)
+                                       elog(ERROR, "no function provided to release variable stats with");
+                       }
+                       else
+                       {
+                               vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+                                                                                                        ObjectIdGetDatum(index->indexoid),
+                                                                                                        Int16GetDatum(attnum),
+                                                                                                        BoolGetDatum(false));
+                               vardata.freefunc = ReleaseSysCache;
+                       }
+               }
+
+               if (HeapTupleIsValid(vardata.statsTuple))
+               {
+                       float4     *numbers;
+                       int                     nnumbers;
+
+                       if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
+                                                                STATISTIC_KIND_CORRELATION,
+                                                                InvalidOid,
+                                                                NULL,
+                                                                NULL, NULL,
+                                                                &numbers, &nnumbers))
+                       {
+                               double          varCorrelation = 0.0;
+
+                               if (nnumbers > 0)
+                                       varCorrelation = Abs(numbers[0]);
+
+                               if (varCorrelation > *indexCorrelation)
+                                       *indexCorrelation = varCorrelation;
+
+                               free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
+                       }
+               }
+
+               ReleaseVariableStats(vardata);
+       }
+
+       qualSelectivity = clauselist_selectivity(root, indexQuals,
+                                                                                        baserel->relid,
+                                                                                        JOIN_INNER, NULL,
+                                                                                        baserel);
+
+       /* work out the actual number of ranges in the index */
+       indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
+                                         1.0);
+
+       /*
+        * Now calculate the minimum possible ranges we could match with if all of
+        * the rows were in the perfect order in the table's heap.
         */
-       *indexStartupCost = spc_seq_page_cost * numPages * loop_count;
+       minimalRanges = ceil(indexRanges * qualSelectivity);
 
        /*
-        * To read a BRIN index there might be a bit of back and forth over
-        * regular pages, as revmap might point to them out of sequential order;
-        * calculate this as reading the whole index in random order.
+        * Now estimate the number of ranges that we'll touch by using the
+        * indexCorrelation from the stats. Careful not to divide by zero
+        * (note we're using the absolute value of the correlation).
         */
-       *indexTotalCost = spc_random_page_cost * numPages * loop_count;
+       if (*indexCorrelation < 1.0e-10)
+               estimatedRanges = indexRanges;
+       else
+               estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
 
-       *indexSelectivity =
-               clauselist_selectivity(root, indexQuals,
-                                                          path->indexinfo->rel->relid,
-                                                          JOIN_INNER, NULL,
-                                                          path->indexinfo->rel);
-       *indexCorrelation = 1;
+       /* we expect to visit this portion of the table */
+       selec = estimatedRanges / indexRanges;
+
+       CLAMP_PROBABILITY(selec);
+
+       *indexSelectivity = selec;
 
        /*
-        * Add on index qual eval costs, much as in genericcostestimate.
+        * Compute the index qual costs, much as in genericcostestimate, to add
+        * to the index costs.
         */
        qual_arg_cost = other_operands_eval_cost(root, qinfos) +
                orderby_operands_eval_cost(root, path);
-       qual_op_cost = cpu_operator_cost *
-               (list_length(indexQuals) + list_length(indexOrderBys));
 
+       /*
+        * Compute the startup cost as the cost to read the whole revmap
+        * sequentially, including the cost to execute the index quals.
+        */
+       *indexStartupCost =
+               spc_seq_page_cost * statsData.revmapNumPages * loop_count;
        *indexStartupCost += qual_arg_cost;
-       *indexTotalCost += qual_arg_cost;
-       *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
-       *indexPages = index->pages;
 
-       /* XXX what about pages_per_range? */
+       /*
+        * To read a BRIN index there might be a bit of back and forth over
+        * regular pages, as revmap might point to them out of sequential order;
+        * calculate the total cost as reading the whole index in random order.
+        */
+       *indexTotalCost = *indexStartupCost +
+               spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
+
+       /*
+        * Charge a small amount per range tuple which we expect to match to. This
+        * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
+        * will set a bit for each page in the range when we find a matching
+        * range, so we must multiply the charge by the number of pages in the
+        * range.
+        */
+       *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
+               statsData.pagesPerRange;
+
+       *indexPages = index->pages;
 }
index 3f4c29bdcb5e7ce0f06e33dd7fe435757cb8b55e..e03aa08f608c4ce4483b705f179552ba1890ded8 100644 (file)
@@ -25,6 +25,17 @@ typedef struct BrinOptions
        bool            autosummarize;
 } BrinOptions;
 
+
+/*
+ * BrinStatsData represents stats data for planner use
+ */
+typedef struct BrinStatsData
+{
+       BlockNumber     pagesPerRange;
+       BlockNumber     revmapNumPages;
+} BrinStatsData;
+
+
 #define BRIN_DEFAULT_PAGES_PER_RANGE   128
 #define BrinGetPagesPerRange(relation) \
        ((relation)->rd_options ? \
@@ -35,4 +46,7 @@ typedef struct BrinOptions
         ((BrinOptions *) (relation)->rd_options)->autosummarize : \
          false)
 
+
+extern void brinGetStats(Relation index, BrinStatsData *stats);
+
 #endif   /* BRIN_H */
index a40f87aea051824c9f80dc6935ccd16a997083b0..ca80f00dc90f208a2d5cace0b2f8ae17b290999d 100644 (file)
@@ -363,6 +363,8 @@ BEGIN
        END LOOP;
 END;
 $x$;
+RESET enable_seqscan;
+RESET enable_bitmapscan;
 INSERT INTO brintest SELECT
        repeat(stringu1, 42)::bytea,
        substr(stringu1, 1, 1)::"char",
@@ -481,3 +483,27 @@ SELECT brin_summarize_range('brin_summarize_idx', -1);
 ERROR:  block number out of range: -1
 SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
 ERROR:  block number out of range: 4294967296
+-- test brin cost estimates behave sanely based on correlation of values
+CREATE TABLE brin_test (a INT, b INT);
+INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
+CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
+CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
+VACUUM ANALYZE brin_test;
+-- Ensure brin index is used when columns are perfectly correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
+                 QUERY PLAN                 
+--------------------------------------------
+ Bitmap Heap Scan on brin_test
+   Recheck Cond: (a = 1)
+   ->  Bitmap Index Scan on brin_test_a_idx
+         Index Cond: (a = 1)
+(4 rows)
+
+-- Ensure brin index is not used when values are not correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
+      QUERY PLAN       
+-----------------------
+ Seq Scan on brin_test
+   Filter: (b = 1)
+(2 rows)
+
index 521b22fe566d3a5802d5a56b26288c6e21029075..11f8fe9bb3261fc01567807e9f624bad4751cae5 100644 (file)
@@ -370,6 +370,9 @@ BEGIN
 END;
 $x$;
 
+RESET enable_seqscan;
+RESET enable_bitmapscan;
+
 INSERT INTO brintest SELECT
        repeat(stringu1, 42)::bytea,
        substr(stringu1, 1, 1)::"char",
@@ -444,3 +447,16 @@ SELECT brin_summarize_range('brin_summarize_idx', 4294967295);
 -- invalid block number values
 SELECT brin_summarize_range('brin_summarize_idx', -1);
 SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
+
+
+-- test brin cost estimates behave sanely based on correlation of values
+CREATE TABLE brin_test (a INT, b INT);
+INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
+CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
+CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
+VACUUM ANALYZE brin_test;
+
+-- Ensure brin index is used when columns are perfectly correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
+-- Ensure brin index is not used when values are not correlated
+EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;