Reduce branches in heapgetpage()'s per-tuple loop
authorAndres Freund <[email protected]>
Sun, 7 Apr 2024 03:51:07 +0000 (20:51 -0700)
committerAndres Freund <[email protected]>
Sun, 7 Apr 2024 06:52:26 +0000 (23:52 -0700)
Until now, heapgetpage()'s loop over all tuples performed some conditional
checks for each tuple, even though condition did not change across the loop.

This commit fixes that by moving the loop into an inline function. By calling
it with different constant arguments, the compiler can generate an optimized
loop for the different conditions, at the price of two per-page checks.

For cases of all-visible tables and an isolation level other than
serializable, speedups of up to 25% have been measured.

Reviewed-by: John Naylor <[email protected]>
Reviewed-by: Zhang Mingli <[email protected]>
Tested-by: Quan Zongliang <[email protected]>
Discussion: https://postgr.es/m/20230716015656[email protected]
Discussion: https://postgr.es/m/2ef7ff1b-3d18-2283-61b1-bbd25fc6c7ce@yeah.net

src/backend/access/heap/heapam.c

index 01bb2f4cc164efc1cb03d130b79ddd8486849df0..a32acc90473f1a6430faaa5d57ee65b260897698 100644 (file)
@@ -364,6 +364,56 @@ heap_setscanlimits(TableScanDesc sscan, BlockNumber startBlk, BlockNumber numBlk
    scan->rs_numblocks = numBlks;
 }
 
+/*
+ * Per-tuple loop for heapgetpage() in pagemode. Pulled out so it can be
+ * called multiple times, with constant arguments for all_visible,
+ * check_serializable.
+ */
+pg_attribute_always_inline
+static int
+heapgetpage_collect(HeapScanDesc scan, Snapshot snapshot,
+                   Page page, Buffer buffer,
+                   BlockNumber block, int lines,
+                   bool all_visible, bool check_serializable)
+{
+   int         ntup = 0;
+   OffsetNumber lineoff;
+
+   for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
+   {
+       ItemId      lpp = PageGetItemId(page, lineoff);
+       HeapTupleData loctup;
+       bool        valid;
+
+       if (!ItemIdIsNormal(lpp))
+           continue;
+
+       loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
+       loctup.t_len = ItemIdGetLength(lpp);
+       loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
+       ItemPointerSet(&(loctup.t_self), block, lineoff);
+
+       if (all_visible)
+           valid = true;
+       else
+           valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
+
+       if (check_serializable)
+           HeapCheckForSerializableConflictOut(valid, scan->rs_base.rs_rd,
+                                               &loctup, buffer, snapshot);
+
+       if (valid)
+       {
+           scan->rs_vistuples[ntup] = lineoff;
+           ntup++;
+       }
+   }
+
+   Assert(ntup <= MaxHeapTuplesPerPage);
+
+   return ntup;
+}
+
 /*
  * heap_prepare_pagescan - Prepare current scan page to be scanned in pagemode
  *
@@ -379,9 +429,8 @@ heap_prepare_pagescan(TableScanDesc sscan)
    Snapshot    snapshot;
    Page        page;
    int         lines;
-   int         ntup;
-   OffsetNumber lineoff;
    bool        all_visible;
+   bool        check_serializable;
 
    Assert(BufferGetBlockNumber(buffer) == block);
 
@@ -403,7 +452,6 @@ heap_prepare_pagescan(TableScanDesc sscan)
 
    page = BufferGetPage(buffer);
    lines = PageGetMaxOffsetNumber(page);
-   ntup = 0;
 
    /*
     * If the all-visible flag indicates that all tuples on the page are
@@ -426,37 +474,35 @@ heap_prepare_pagescan(TableScanDesc sscan)
     * tuple for visibility the hard way.
     */
    all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
+   check_serializable =
+       CheckForSerializableConflictOutNeeded(scan->rs_base.rs_rd, snapshot);
 
-   for (lineoff = FirstOffsetNumber; lineoff <= lines; lineoff++)
+   /*
+    * We call heapgetpage_collect() with constant arguments, to get the
+    * compiler to constant fold the constant arguments. Separate calls with
+    * constant arguments, rather than variables, are needed on several
+    * compilers to actually perform constant folding.
+    */
+   if (likely(all_visible))
    {
-       ItemId      lpp = PageGetItemId(page, lineoff);
-       HeapTupleData loctup;
-       bool        valid;
-
-       if (!ItemIdIsNormal(lpp))
-           continue;
-
-       loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
-       loctup.t_data = (HeapTupleHeader) PageGetItem(page, lpp);
-       loctup.t_len = ItemIdGetLength(lpp);
-       ItemPointerSet(&(loctup.t_self), block, lineoff);
-
-       if (all_visible)
-           valid = true;
+       if (likely(!check_serializable))
+           scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+                                                  block, lines, true, false);
        else
-           valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer);
-
-       HeapCheckForSerializableConflictOut(valid, scan->rs_base.rs_rd,
-                                           &loctup, buffer, snapshot);
-
-       if (valid)
-           scan->rs_vistuples[ntup++] = lineoff;
+           scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+                                                  block, lines, true, true);
+   }
+   else
+   {
+       if (likely(!check_serializable))
+           scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+                                                  block, lines, false, false);
+       else
+           scan->rs_ntuples = heapgetpage_collect(scan, snapshot, page, buffer,
+                                                  block, lines, false, true);
    }
 
    LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
-
-   Assert(ntup <= MaxHeapTuplesPerPage);
-   scan->rs_ntuples = ntup;
 }
 
 /*