tableam: Sample Scan Support.
authorAndres Freund <[email protected]>
Sun, 20 Jan 2019 08:08:58 +0000 (00:08 -0800)
committerAndres Freund <[email protected]>
Wed, 6 Mar 2019 06:59:31 +0000 (22:59 -0800)
Author:
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:

contrib/tsm_system_rows/tsm_system_rows.c
contrib/tsm_system_time/tsm_system_time.c
src/backend/access/heap/heapam_handler.c
src/backend/access/tablesample/system.c
src/backend/executor/nodeSamplescan.c
src/include/access/tableam.h
src/include/access/tsmapi.h
src/include/nodes/execnodes.h
src/include/nodes/tidbitmap.h

index 1d35ea3c53aabefda85fe353dda461ed266d25a3..3611d0583315eb983b2f9afac8e6c17f37f164a2 100644 (file)
@@ -28,7 +28,6 @@
 
 #include "postgres.h"
 
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
@@ -46,7 +45,6 @@ typedef struct
 {
    uint32      seed;           /* random seed */
    int64       ntuples;        /* number of tuples to return */
-   int64       donetuples;     /* number of tuples already returned */
    OffsetNumber lt;            /* last tuple returned from current block */
    BlockNumber doneblocks;     /* number of already-scanned blocks */
    BlockNumber lb;             /* last block visited */
@@ -67,11 +65,10 @@ static void system_rows_beginsamplescan(SampleScanState *node,
                            Datum *params,
                            int nparams,
                            uint32 seed);
-static BlockNumber system_rows_nextsampleblock(SampleScanState *node);
+static BlockNumber system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_rows_nextsampletuple(SampleScanState *node,
                            BlockNumber blockno,
                            OffsetNumber maxoffset);
-static bool SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan);
 static uint32 random_relative_prime(uint32 n, SamplerRandomState randstate);
 
 
@@ -187,7 +184,6 @@ system_rows_beginsamplescan(SampleScanState *node,
 
    sampler->seed = seed;
    sampler->ntuples = ntuples;
-   sampler->donetuples = 0;
    sampler->lt = InvalidOffsetNumber;
    sampler->doneblocks = 0;
    /* lb will be initialized during first NextSampleBlock call */
@@ -206,11 +202,9 @@ system_rows_beginsamplescan(SampleScanState *node,
  * Uses linear probing algorithm for picking next block.
  */
 static BlockNumber
-system_rows_nextsampleblock(SampleScanState *node)
+system_rows_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
    SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-   TableScanDesc scan = node->ss.ss_currentScanDesc;
-   HeapScanDesc hscan = (HeapScanDesc) scan;
 
    /* First call within scan? */
    if (sampler->doneblocks == 0)
@@ -222,14 +216,14 @@ system_rows_nextsampleblock(SampleScanState *node)
            SamplerRandomState randstate;
 
            /* If relation is empty, there's nothing to scan */
-           if (hscan->rs_nblocks == 0)
+           if (nblocks == 0)
                return InvalidBlockNumber;
 
            /* We only need an RNG during this setup step */
            sampler_random_init_state(sampler->seed, randstate);
 
            /* Compute nblocks/firstblock/step only once per query */
-           sampler->nblocks = hscan->rs_nblocks;
+           sampler->nblocks = nblocks;
 
            /* Choose random starting block within the relation */
            /* (Actually this is the predecessor of the first block visited) */
@@ -246,7 +240,7 @@ system_rows_nextsampleblock(SampleScanState *node)
 
    /* If we've read all blocks or returned all needed tuples, we're done */
    if (++sampler->doneblocks > sampler->nblocks ||
-       sampler->donetuples >= sampler->ntuples)
+       node->donetuples >= sampler->ntuples)
        return InvalidBlockNumber;
 
    /*
@@ -259,7 +253,7 @@ system_rows_nextsampleblock(SampleScanState *node)
    {
        /* Advance lb, using uint64 arithmetic to forestall overflow */
        sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-   } while (sampler->lb >= hscan->rs_nblocks);
+   } while (sampler->lb >= nblocks);
 
    return sampler->lb;
 }
@@ -279,77 +273,27 @@ system_rows_nextsampletuple(SampleScanState *node,
                            OffsetNumber maxoffset)
 {
    SystemRowsSamplerData *sampler = (SystemRowsSamplerData *) node->tsm_state;
-   TableScanDesc scan = node->ss.ss_currentScanDesc;
-   HeapScanDesc hscan = (HeapScanDesc) scan;
    OffsetNumber tupoffset = sampler->lt;
 
    /* Quit if we've returned all needed tuples */
-   if (sampler->donetuples >= sampler->ntuples)
+   if (node->donetuples >= sampler->ntuples)
        return InvalidOffsetNumber;
 
-   /*
-    * Because we should only count visible tuples as being returned, we need
-    * to search for a visible tuple rather than just let the core code do it.
-    */
-
-   /* We rely on the data accumulated in pagemode access */
-   Assert(scan->rs_pageatatime);
-   for (;;)
-   {
-       /* Advance to next possible offset on page */
-       if (tupoffset == InvalidOffsetNumber)
-           tupoffset = FirstOffsetNumber;
-       else
-           tupoffset++;
-
-       /* Done? */
-       if (tupoffset > maxoffset)
-       {
-           tupoffset = InvalidOffsetNumber;
-           break;
-       }
+   /* Advance to next possible offset on page */
+   if (tupoffset == InvalidOffsetNumber)
+       tupoffset = FirstOffsetNumber;
+   else
+       tupoffset++;
 
-       /* Found a candidate? */
-       if (SampleOffsetVisible(tupoffset, hscan))
-       {
-           sampler->donetuples++;
-           break;
-       }
-   }
+   /* Done? */
+   if (tupoffset > maxoffset)
+       tupoffset = InvalidOffsetNumber;
 
    sampler->lt = tupoffset;
 
    return tupoffset;
 }
 
-/*
- * Check if tuple offset is visible
- *
- * In pageatatime mode, heapgetpage() already did visibility checks,
- * so just look at the info it left in rs_vistuples[].
- */
-static bool
-SampleOffsetVisible(OffsetNumber tupoffset, HeapScanDesc scan)
-{
-   int         start = 0,
-               end = scan->rs_ntuples - 1;
-
-   while (start <= end)
-   {
-       int         mid = (start + end) / 2;
-       OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-       if (tupoffset == curoffset)
-           return true;
-       else if (tupoffset < curoffset)
-           end = mid - 1;
-       else
-           start = mid + 1;
-   }
-
-   return false;
-}
-
 /*
  * Compute greatest common divisor of two uint32's.
  */
index 1cc7264e084da477822bd8b2e76f8efd36f95451..ab9f685f0afb4ae19c70fdfb22732f03667568f9 100644 (file)
@@ -26,7 +26,6 @@
 
 #include <math.h>
 
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
@@ -66,7 +65,7 @@ static void system_time_beginsamplescan(SampleScanState *node,
                            Datum *params,
                            int nparams,
                            uint32 seed);
-static BlockNumber system_time_nextsampleblock(SampleScanState *node);
+static BlockNumber system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_time_nextsampletuple(SampleScanState *node,
                            BlockNumber blockno,
                            OffsetNumber maxoffset);
@@ -213,11 +212,9 @@ system_time_beginsamplescan(SampleScanState *node,
  * Uses linear probing algorithm for picking next block.
  */
 static BlockNumber
-system_time_nextsampleblock(SampleScanState *node)
+system_time_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
    SystemTimeSamplerData *sampler = (SystemTimeSamplerData *) node->tsm_state;
-   TableScanDesc scan = node->ss.ss_currentScanDesc;
-   HeapScanDesc hscan = (HeapScanDesc) scan;
    instr_time  cur_time;
 
    /* First call within scan? */
@@ -230,14 +227,14 @@ system_time_nextsampleblock(SampleScanState *node)
            SamplerRandomState randstate;
 
            /* If relation is empty, there's nothing to scan */
-           if (hscan->rs_nblocks == 0)
+           if (nblocks == 0)
                return InvalidBlockNumber;
 
            /* We only need an RNG during this setup step */
            sampler_random_init_state(sampler->seed, randstate);
 
            /* Compute nblocks/firstblock/step only once per query */
-           sampler->nblocks = hscan->rs_nblocks;
+           sampler->nblocks = nblocks;
 
            /* Choose random starting block within the relation */
            /* (Actually this is the predecessor of the first block visited) */
@@ -273,7 +270,7 @@ system_time_nextsampleblock(SampleScanState *node)
    {
        /* Advance lb, using uint64 arithmetic to forestall overflow */
        sampler->lb = ((uint64) sampler->lb + sampler->step) % sampler->nblocks;
-   } while (sampler->lb >= hscan->rs_nblocks);
+   } while (sampler->lb >= nblocks);
 
    return sampler->lb;
 }
index b3d453b957c489fea38a719f74bce223a466474d..2118532db2d6a1a32e2f8366e4a05e17a9333731 100644 (file)
@@ -27,6 +27,7 @@
 #include "access/heapam.h"
 #include "access/multixact.h"
 #include "access/tableam.h"
+#include "access/tsmapi.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/index.h"
@@ -35,6 +36,7 @@
 #include "catalog/storage_xlog.h"
 #include "executor/executor.h"
 #include "optimizer/plancat.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/bufpage.h"
 #include "storage/bufmgr.h"
@@ -1794,6 +1796,218 @@ heapam_index_validate_scan(Relation heapRelation,
    indexInfo->ii_PredicateState = NULL;
 }
 
+/*
+ * Check visibility of the tuple.
+ */
+static bool
+SampleHeapTupleVisible(HeapScanDesc scan, Buffer buffer,
+                      HeapTuple tuple,
+                      OffsetNumber tupoffset)
+{
+   if (scan->rs_scan.rs_pageatatime)
+   {
+       /*
+        * In pageatatime mode, heapgetpage() already did visibility checks,
+        * so just look at the info it left in rs_vistuples[].
+        *
+        * We use a binary search over the known-sorted array.  Note: we could
+        * save some effort if we insisted that NextSampleTuple select tuples
+        * in increasing order, but it's not clear that there would be enough
+        * gain to justify the restriction.
+        */
+       int         start = 0,
+                   end = scan->rs_ntuples - 1;
+
+       while (start <= end)
+       {
+           int         mid = (start + end) / 2;
+           OffsetNumber curoffset = scan->rs_vistuples[mid];
+
+           if (tupoffset == curoffset)
+               return true;
+           else if (tupoffset < curoffset)
+               end = mid - 1;
+           else
+               start = mid + 1;
+       }
+
+       return false;
+   }
+   else
+   {
+       /* Otherwise, we have to check the tuple individually. */
+       return HeapTupleSatisfiesVisibility(tuple, scan->rs_scan.rs_snapshot,
+                                           buffer);
+   }
+}
+
+static bool
+heapam_scan_sample_next_block(TableScanDesc sscan, struct SampleScanState *scanstate)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   BlockNumber blockno;
+
+   /* return false immediately if relation is empty */
+   if (scan->rs_nblocks == 0)
+       return false;
+
+   if (tsm->NextSampleBlock)
+   {
+       blockno = tsm->NextSampleBlock(scanstate, scan->rs_nblocks);
+       scan->rs_cblock = blockno;
+   }
+   else
+   {
+       /* scanning table sequentially */
+
+       if (scan->rs_cblock == InvalidBlockNumber)
+       {
+           Assert(!scan->rs_inited);
+           blockno = scan->rs_startblock;
+       }
+       else
+       {
+           Assert(scan->rs_inited);
+
+           blockno = scan->rs_cblock + 1;
+
+           if (blockno >= scan->rs_nblocks)
+           {
+               /* wrap to begining of rel, might not have started at 0 */
+               blockno = 0;
+           }
+
+           /*
+            * Report our new scan position for synchronization purposes.
+            *
+            * Note: we do this before checking for end of scan so that the
+            * final state of the position hint is back at the start of the
+            * rel.  That's not strictly necessary, but otherwise when you run
+            * the same query multiple times the starting position would shift
+            * a little bit backwards on every invocation, which is confusing.
+            * We don't guarantee any specific ordering in general, though.
+            */
+           if (scan->rs_scan.rs_syncscan)
+               ss_report_location(scan->rs_scan.rs_rd, blockno);
+
+           if (blockno == scan->rs_startblock)
+           {
+               blockno = InvalidBlockNumber;
+           }
+       }
+   }
+
+   if (!BlockNumberIsValid(blockno))
+   {
+       if (BufferIsValid(scan->rs_cbuf))
+           ReleaseBuffer(scan->rs_cbuf);
+       scan->rs_cbuf = InvalidBuffer;
+       scan->rs_cblock = InvalidBlockNumber;
+       scan->rs_inited = false;
+
+       return false;
+   }
+
+   heapgetpage(sscan, blockno);
+   scan->rs_inited = true;
+
+   return true;
+}
+
+static bool
+heapam_scan_sample_next_tuple(TableScanDesc sscan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+   HeapScanDesc scan = (HeapScanDesc) sscan;
+   TsmRoutine *tsm = scanstate->tsmroutine;
+   BlockNumber blockno = scan->rs_cblock;
+   bool        pagemode = scan->rs_scan.rs_pageatatime;
+
+   Page        page;
+   bool        all_visible;
+   OffsetNumber maxoffset;
+
+   ExecClearTuple(slot);
+
+   /*
+    * When not using pagemode, we must lock the buffer during tuple
+    * visibility checks.
+    */
+   if (!pagemode)
+       LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
+
+   page = (Page) BufferGetPage(scan->rs_cbuf);
+   all_visible = PageIsAllVisible(page) && !scan->rs_scan.rs_snapshot->takenDuringRecovery;
+   maxoffset = PageGetMaxOffsetNumber(page);
+
+   for (;;)
+   {
+       OffsetNumber tupoffset;
+
+       CHECK_FOR_INTERRUPTS();
+
+       /* Ask the tablesample method which tuples to check on this page. */
+       tupoffset = tsm->NextSampleTuple(scanstate,
+                                        blockno,
+                                        maxoffset);
+
+       if (OffsetNumberIsValid(tupoffset))
+       {
+           ItemId      itemid;
+           bool        visible;
+           HeapTuple   tuple = &(scan->rs_ctup);
+
+           /* Skip invalid tuple pointers. */
+           itemid = PageGetItemId(page, tupoffset);
+           if (!ItemIdIsNormal(itemid))
+               continue;
+
+           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+           tuple->t_len = ItemIdGetLength(itemid);
+           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
+
+
+           if (all_visible)
+               visible = true;
+           else
+               visible = SampleHeapTupleVisible(scan, scan->rs_cbuf, tuple, tupoffset);
+
+           /* in pagemode, heapgetpage did this for us */
+           if (!pagemode)
+               CheckForSerializableConflictOut(visible, scan->rs_scan.rs_rd, tuple,
+                                               scan->rs_cbuf, scan->rs_scan.rs_snapshot);
+
+           /* Try next tuple from same page. */
+           if (!visible)
+               continue;
+
+           /* Found visible tuple, return it. */
+           if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           ExecStoreBufferHeapTuple(tuple, slot, scan->rs_cbuf);
+
+           /* Count successfully-fetched tuples as heap fetches */
+           pgstat_count_heap_getnext(scan->rs_scan.rs_rd);
+
+           return true;
+       }
+       else
+       {
+           /*
+            * If we get here, it means we've exhausted the items on this page
+            * and it's time to move to the next.
+            */
+           if (!pagemode)
+               LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+
+           break;
+       }
+   }
+
+   return false;
+}
+
 /*
  * Reconstruct and rewrite the given tuple
  *
@@ -1983,6 +2197,9 @@ static const TableAmRoutine heapam_methods = {
    .index_validate_scan = heapam_index_validate_scan,
 
    .relation_estimate_size = heapam_estimate_rel_size,
+
+   .scan_sample_next_block = heapam_scan_sample_next_block,
+   .scan_sample_next_tuple = heapam_scan_sample_next_tuple
 };
 
 
index fe62a73341e53b138e5628574de58b9a384a6c9e..476411caf1a39d1869f352d0e8bfaf88cf06f96e 100644 (file)
@@ -27,7 +27,6 @@
 #include <math.h>
 
 #include "access/hash.h"
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tsmapi.h"
 #include "catalog/pg_type.h"
@@ -56,7 +55,7 @@ static void system_beginsamplescan(SampleScanState *node,
                       Datum *params,
                       int nparams,
                       uint32 seed);
-static BlockNumber system_nextsampleblock(SampleScanState *node);
+static BlockNumber system_nextsampleblock(SampleScanState *node, BlockNumber nblocks);
 static OffsetNumber system_nextsampletuple(SampleScanState *node,
                       BlockNumber blockno,
                       OffsetNumber maxoffset);
@@ -177,11 +176,9 @@ system_beginsamplescan(SampleScanState *node,
  * Select next block to sample.
  */
 static BlockNumber
-system_nextsampleblock(SampleScanState *node)
+system_nextsampleblock(SampleScanState *node, BlockNumber nblocks)
 {
    SystemSamplerData *sampler = (SystemSamplerData *) node->tsm_state;
-   TableScanDesc scan = node->ss.ss_currentScanDesc;
-   HeapScanDesc hscan = (HeapScanDesc) scan;
    BlockNumber nextblock = sampler->nextblock;
    uint32      hashinput[2];
 
@@ -200,7 +197,7 @@ system_nextsampleblock(SampleScanState *node)
     * Loop over block numbers until finding suitable block or reaching end of
     * relation.
     */
-   for (; nextblock < hscan->rs_nblocks; nextblock++)
+   for (; nextblock < nblocks; nextblock++)
    {
        uint32      hash;
 
@@ -212,7 +209,7 @@ system_nextsampleblock(SampleScanState *node)
            break;
    }
 
-   if (nextblock < hscan->rs_nblocks)
+   if (nextblock < nblocks)
    {
        /* Found a suitable block; remember where we should start next time */
        sampler->nextblock = nextblock + 1;
index b311eb72ff31e4234456300a05cdd8fdc98b931b..ce0d3bfa572646f1fd5f8231a7aec7cb56c85239 100644 (file)
@@ -15,7 +15,6 @@
 #include "postgres.h"
 
 #include "access/hash.h"
-#include "access/heapam.h"
 #include "access/relscan.h"
 #include "access/tableam.h"
 #include "access/tsmapi.h"
@@ -29,9 +28,7 @@
 
 static TupleTableSlot *SampleNext(SampleScanState *node);
 static void tablesample_init(SampleScanState *scanstate);
-static HeapTuple tablesample_getnext(SampleScanState *scanstate);
-static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
-                  HeapScanDesc scan);
+static TupleTableSlot *tablesample_getnext(SampleScanState *scanstate);
 
 /* ----------------------------------------------------------------
  *                     Scan Support
@@ -47,10 +44,6 @@ static bool SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset,
 static TupleTableSlot *
 SampleNext(SampleScanState *node)
 {
-   HeapTuple   tuple;
-   TupleTableSlot *slot;
-   HeapScanDesc hscan;
-
    /*
     * if this is first call within a scan, initialize
     */
@@ -60,19 +53,7 @@ SampleNext(SampleScanState *node)
    /*
     * get the next tuple, and store it in our result slot
     */
-   tuple = tablesample_getnext(node);
-
-   slot = node->ss.ss_ScanTupleSlot;
-   hscan = (HeapScanDesc) node->ss.ss_currentScanDesc;
-
-   if (tuple)
-       ExecStoreBufferHeapTuple(tuple, /* tuple to store */
-                                slot,  /* slot to store in */
-                                hscan->rs_cbuf); /* tuple's buffer */
-   else
-       ExecClearTuple(slot);
-
-   return slot;
+   return tablesample_getnext(node);
 }
 
 /*
@@ -237,6 +218,9 @@ ExecReScanSampleScan(SampleScanState *node)
 {
    /* Remember we need to do BeginSampleScan again (if we did it at all) */
    node->begun = false;
+   node->done = false;
+   node->haveblock = false;
+   node->donetuples = 0;
 
    ExecScanReScan(&node->ss);
 }
@@ -258,6 +242,7 @@ tablesample_init(SampleScanState *scanstate)
    int         i;
    ListCell   *arg;
 
+   scanstate->donetuples = 0;
    params = (Datum *) palloc(list_length(scanstate->args) * sizeof(Datum));
 
    i = 0;
@@ -345,225 +330,49 @@ tablesample_init(SampleScanState *scanstate)
 
 /*
  * Get next tuple from TABLESAMPLE method.
- *
- * Note: an awful lot of this is copied-and-pasted from heapam.c.  It would
- * perhaps be better to refactor to share more code.
  */
-static HeapTuple
+static TupleTableSlot *
 tablesample_getnext(SampleScanState *scanstate)
 {
-   TsmRoutine *tsm = scanstate->tsmroutine;
    TableScanDesc scan = scanstate->ss.ss_currentScanDesc;
-   HeapScanDesc hscan = (HeapScanDesc) scan;
-   HeapTuple   tuple = &(hscan->rs_ctup);
-   Snapshot    snapshot = scan->rs_snapshot;
-   bool        pagemode = scan->rs_pageatatime;
-   BlockNumber blockno;
-   Page        page;
-   bool        all_visible;
-   OffsetNumber maxoffset;
-
-   if (!hscan->rs_inited)
-   {
-       /*
-        * return null immediately if relation is empty
-        */
-       if (hscan->rs_nblocks == 0)
-       {
-           Assert(!BufferIsValid(hscan->rs_cbuf));
-           tuple->t_data = NULL;
-           return NULL;
-       }
-       if (tsm->NextSampleBlock)
-       {
-           blockno = tsm->NextSampleBlock(scanstate);
-           if (!BlockNumberIsValid(blockno))
-           {
-               tuple->t_data = NULL;
-               return NULL;
-           }
-       }
-       else
-           blockno = hscan->rs_startblock;
-       Assert(blockno < hscan->rs_nblocks);
-       heapgetpage(scan, blockno);
-       hscan->rs_inited = true;
-   }
-   else
-   {
-       /* continue from previously returned page/tuple */
-       blockno = hscan->rs_cblock; /* current page */
-   }
+   TupleTableSlot *slot = scanstate->ss.ss_ScanTupleSlot;
 
-   /*
-    * When not using pagemode, we must lock the buffer during tuple
-    * visibility checks.
-    */
-   if (!pagemode)
-       LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
+   ExecClearTuple(slot);
 
-   page = (Page) BufferGetPage(hscan->rs_cbuf);
-   all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-   maxoffset = PageGetMaxOffsetNumber(page);
+   if (scanstate->done)
+       return NULL;
 
    for (;;)
    {
-       OffsetNumber tupoffset;
-       bool        finished;
-
-       CHECK_FOR_INTERRUPTS();
-
-       /* Ask the tablesample method which tuples to check on this page. */
-       tupoffset = tsm->NextSampleTuple(scanstate,
-                                        blockno,
-                                        maxoffset);
-
-       if (OffsetNumberIsValid(tupoffset))
+       if (!scanstate->haveblock)
        {
-           ItemId      itemid;
-           bool        visible;
-
-           /* Skip invalid tuple pointers. */
-           itemid = PageGetItemId(page, tupoffset);
-           if (!ItemIdIsNormal(itemid))
-               continue;
-
-           tuple->t_data = (HeapTupleHeader) PageGetItem(page, itemid);
-           tuple->t_len = ItemIdGetLength(itemid);
-           ItemPointerSet(&(tuple->t_self), blockno, tupoffset);
-
-           if (all_visible)
-               visible = true;
-           else
-               visible = SampleTupleVisible(tuple, tupoffset, hscan);
-
-           /* in pagemode, heapgetpage did this for us */
-           if (!pagemode)
-               CheckForSerializableConflictOut(visible, scan->rs_rd, tuple,
-                                               hscan->rs_cbuf, snapshot);
-
-           if (visible)
-           {
-               /* Found visible tuple, return it. */
-               if (!pagemode)
-                   LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
-               break;
-           }
-           else
+           if (!table_scan_sample_next_block(scan, scanstate))
            {
-               /* Try next tuple from same page. */
-               continue;
-           }
-       }
+               scanstate->haveblock = false;
+               scanstate->done = true;
 
-       /*
-        * if we get here, it means we've exhausted the items on this page and
-        * it's time to move to the next.
-        */
-       if (!pagemode)
-           LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK);
+               /* exhausted relation */
+               return NULL;
+           }
 
-       if (tsm->NextSampleBlock)
-       {
-           blockno = tsm->NextSampleBlock(scanstate);
-           Assert(!scan->rs_syncscan);
-           finished = !BlockNumberIsValid(blockno);
+           scanstate->haveblock = true;
        }
-       else
-       {
-           /* Without NextSampleBlock, just do a plain forward seqscan. */
-           blockno++;
-           if (blockno >= hscan->rs_nblocks)
-               blockno = 0;
 
+       if (!table_scan_sample_next_tuple(scan, scanstate, slot))
+       {
            /*
-            * Report our new scan position for synchronization purposes.
-            *
-            * Note: we do this before checking for end of scan so that the
-            * final state of the position hint is back at the start of the
-            * rel.  That's not strictly necessary, but otherwise when you run
-            * the same query multiple times the starting position would shift
-            * a little bit backwards on every invocation, which is confusing.
-            * We don't guarantee any specific ordering in general, though.
+            * If we get here, it means we've exhausted the items on this page
+            * and it's time to move to the next.
             */
-           if (scan->rs_syncscan)
-               ss_report_location(scan->rs_rd, blockno);
-
-           finished = (blockno == hscan->rs_startblock);
+           scanstate->haveblock = false;
+           continue;
        }
 
-       /*
-        * Reached end of scan?
-        */
-       if (finished)
-       {
-           if (BufferIsValid(hscan->rs_cbuf))
-               ReleaseBuffer(hscan->rs_cbuf);
-           hscan->rs_cbuf = InvalidBuffer;
-           hscan->rs_cblock = InvalidBlockNumber;
-           tuple->t_data = NULL;
-           hscan->rs_inited = false;
-           return NULL;
-       }
-
-       Assert(blockno < hscan->rs_nblocks);
-       heapgetpage(scan, blockno);
-
-       /* Re-establish state for new page */
-       if (!pagemode)
-           LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE);
-
-       page = (Page) BufferGetPage(hscan->rs_cbuf);
-       all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
-       maxoffset = PageGetMaxOffsetNumber(page);
+       /* Found visible tuple, return it. */
+       break;
    }
 
-   /* Count successfully-fetched tuples as heap fetches */
-   pgstat_count_heap_getnext(scan->rs_rd);
-
-   return &(hscan->rs_ctup);
-}
+   scanstate->donetuples++;
 
-/*
- * Check visibility of the tuple.
- */
-static bool
-SampleTupleVisible(HeapTuple tuple, OffsetNumber tupoffset, HeapScanDesc scan)
-{
-   if (scan->rs_scan.rs_pageatatime)
-   {
-       /*
-        * In pageatatime mode, heapgetpage() already did visibility checks,
-        * so just look at the info it left in rs_vistuples[].
-        *
-        * We use a binary search over the known-sorted array.  Note: we could
-        * save some effort if we insisted that NextSampleTuple select tuples
-        * in increasing order, but it's not clear that there would be enough
-        * gain to justify the restriction.
-        */
-       int         start = 0,
-                   end = scan->rs_ntuples - 1;
-
-       while (start <= end)
-       {
-           int         mid = (start + end) / 2;
-           OffsetNumber curoffset = scan->rs_vistuples[mid];
-
-           if (tupoffset == curoffset)
-               return true;
-           else if (tupoffset < curoffset)
-               end = mid - 1;
-           else
-               start = mid + 1;
-       }
-
-       return false;
-   }
-   else
-   {
-       /* Otherwise, we have to check the tuple individually. */
-       return HeapTupleSatisfiesVisibility(tuple,
-                                           scan->rs_scan.rs_snapshot,
-                                           scan->rs_cbuf);
-   }
+   return slot;
 }
index 4a1d83a87c811a0db9f506a2f8a45162393a1789..e8ab0839df9ed826ed0c10c5eaa31d9e2acbd1c6 100644 (file)
@@ -29,6 +29,7 @@ extern bool synchronize_seqscans;
 
 struct VacuumParams;
 struct ValidateIndexState;
+struct SampleScanState;
 struct BulkInsertStateData;
 
 /*
@@ -239,6 +240,18 @@ typedef struct TableAmRoutine
 
    void        (*relation_estimate_size) (Relation rel, int32 *attr_widths,
                                           BlockNumber *pages, double *tuples, double *allvisfrac);
+
+
+   /* ------------------------------------------------------------------------
+    * Executor related functions.
+    * ------------------------------------------------------------------------
+    */
+
+   bool        (*scan_sample_next_block) (TableScanDesc scan,
+                                          struct SampleScanState *scanstate);
+   bool        (*scan_sample_next_tuple) (TableScanDesc scan,
+                                          struct SampleScanState *scanstate,
+                                          TupleTableSlot *slot);
 } TableAmRoutine;
 
 
@@ -710,6 +723,24 @@ table_estimate_size(Relation rel, int32 *attr_widths,
 }
 
 
+/* ----------------------------------------------------------------------------
+ * Executor related functionality
+ * ----------------------------------------------------------------------------
+ */
+
+static inline bool
+table_scan_sample_next_block(TableScanDesc scan, struct SampleScanState *scanstate)
+{
+   return scan->rs_rd->rd_tableam->scan_sample_next_block(scan, scanstate);
+}
+
+static inline bool
+table_scan_sample_next_tuple(TableScanDesc scan, struct SampleScanState *scanstate, TupleTableSlot *slot)
+{
+   return scan->rs_rd->rd_tableam->scan_sample_next_tuple(scan, scanstate, slot);
+}
+
+
 /* ----------------------------------------------------------------------------
  * Functions to make modifications a bit simpler.
  * ----------------------------------------------------------------------------
index a5c0b4cafecfac336864536b6dc0749d512f08bd..ccef65aedb432943ca0f8865665d38f3fe104a59 100644 (file)
@@ -34,7 +34,7 @@ typedef void (*BeginSampleScan_function) (SampleScanState *node,
                                          int nparams,
                                          uint32 seed);
 
-typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node);
+typedef BlockNumber (*NextSampleBlock_function) (SampleScanState *node, BlockNumber nblocks);
 
 typedef OffsetNumber (*NextSampleTuple_function) (SampleScanState *node,
                                                  BlockNumber blockno,
index fb61a339acb0b5f3d8e4f323ece320f66aa95e8a..5ad1bdfd9b33f829a7e152c8676fb483fd5b1297 100644 (file)
@@ -1298,6 +1298,9 @@ typedef struct SampleScanState
    bool        use_pagemode;   /* use page-at-a-time visibility checking? */
    bool        begun;          /* false means need to call BeginSampleScan */
    uint32      seed;           /* random seed */
+   int64       donetuples;     /* number of tuples already returned */
+   bool        haveblock;      /* has a block for sampling been determined */
+   bool        done;           /* exhausted all tuples? */
 } SampleScanState;
 
 /*
@@ -1526,6 +1529,7 @@ typedef struct BitmapHeapScanState
    Buffer      pvmbuffer;
    long        exact_pages;
    long        lossy_pages;
+   int         return_empty_tuples;
    TBMIterator *prefetch_iterator;
    int         prefetch_pages;
    int         prefetch_target;
index 2645085b3448c490f9055d25b49618f6be9412ad..6a7f3054a4168c898199ed5506ec09553ddade54 100644 (file)
@@ -37,7 +37,7 @@ typedef struct TBMIterator TBMIterator;
 typedef struct TBMSharedIterator TBMSharedIterator;
 
 /* Result structure for tbm_iterate */
-typedef struct
+typedef struct TBMIterateResult
 {
    BlockNumber blockno;        /* page number containing tuples */
    int         ntuples;        /* -1 indicates lossy result */