Speedup hash index builds by skipping needless binary searches

author David Rowley <[email protected]>

Thu, 24 Nov 2022 04:21:44 +0000 (17:21 +1300)

committer David Rowley <[email protected]>

Thu, 24 Nov 2022 04:21:44 +0000 (17:21 +1300)
author David Rowley <[email protected]>
Thu, 24 Nov 2022 04:21:44 +0000 (17:21 +1300)
committer David Rowley <[email protected]>
Thu, 24 Nov 2022 04:21:44 +0000 (17:21 +1300)
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index c361509d68df52948d9fc30a1ec07f8c92c53ea3..77fd147f68a0c9ff61a2c75d215ea72c733d42a8 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -231,7 +231,7 @@ hashbuildCallback(Relation index,
         itup = index_form_tuple(RelationGetDescr(index),
                                 index_values, index_isnull);
         itup->t_tid = *tid;
-       _hash_doinsert(index, itup, buildstate->heapRel);
+       _hash_doinsert(index, itup, buildstate->heapRel, false);
         pfree(itup);
     }
  
@@ -265,7 +265,7 @@ hashinsert(Relation rel, Datum *values, bool *isnull,
     itup = index_form_tuple(RelationGetDescr(rel), index_values, index_isnull);
     itup->t_tid = *ht_ctid;
  
-   _hash_doinsert(rel, itup, heapRel);
+   _hash_doinsert(rel, itup, heapRel, false);
  
     pfree(itup);
  
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c

index 23907d2e5b11f64d6f363bba239a0089c54ac3e2..9db522051ef9fd2c94ed9432b8f5fdd450fe1167 100644 (file)
--- a/src/backend/access/hash/hashinsert.c
+++ b/src/backend/access/hash/hashinsert.c
@@ -32,9 +32,12 @@ static void _hash_vacuum_one_page(Relation rel, Relation hrel,
   *
   *     This routine is called by the public interface routines, hashbuild
   *     and hashinsert.  By here, itup is completely filled in.
+ *
+ * 'sorted' must only be passed as 'true' when inserts are done in hashkey
+ * order.
   */
  void
-_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel)
+_hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel, bool sorted)
  {
     Buffer      buf = InvalidBuffer;
     Buffer      bucket_buf;
@@ -198,7 +201,7 @@ restart_insert:
     START_CRIT_SECTION();
  
     /* found page with enough space, so add the item here */
-   itup_off = _hash_pgaddtup(rel, buf, itemsz, itup);
+   itup_off = _hash_pgaddtup(rel, buf, itemsz, itup, sorted);
     MarkBufferDirty(buf);
  
     /* metapage operations */
@@ -263,21 +266,43 @@ restart_insert:
   *
   * Returns the offset number at which the tuple was inserted.  This function
   * is responsible for preserving the condition that tuples in a hash index
- * page are sorted by hashkey value.
+ * page are sorted by hashkey value, however, if the caller is certain that
+ * the hashkey for the tuple being added is >= the hashkeys of all existing
+ * tuples on the page, then the 'appendtup' flag may be passed as true.  This
+ * saves from having to binary search for the correct location to insert the
+ * tuple.
   */
  OffsetNumber
-_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup)
+_hash_pgaddtup(Relation rel, Buffer buf, Size itemsize, IndexTuple itup,
+              bool appendtup)
  {
     OffsetNumber itup_off;
     Page        page;
-   uint32      hashkey;
  
     _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
     page = BufferGetPage(buf);
  
-   /* Find where to insert the tuple (preserving page's hashkey ordering) */
-   hashkey = _hash_get_indextuple_hashkey(itup);
-   itup_off = _hash_binsearch(page, hashkey);
+   /*
+    * Find where to insert the tuple (preserving page's hashkey ordering). If
+    * 'appendtup' is true then we just insert it at the end.
+    */
+   if (appendtup)
+   {
+       itup_off = PageGetMaxOffsetNumber(page) + 1;
+
+       /* ensure this tuple's hashkey is >= the final existing tuple */
+       Assert(PageGetMaxOffsetNumber(page) == 0 ||
+              _hash_get_indextuple_hashkey((IndexTuple)
+                                           PageGetItem(page, PageGetItemId(page,
+                                                                           PageGetMaxOffsetNumber(page)))) <=
+              _hash_get_indextuple_hashkey(itup));
+   }
+   else
+   {
+       uint32      hashkey = _hash_get_indextuple_hashkey(itup);
+
+       itup_off = _hash_binsearch(page, hashkey);
+   }
  
     if (PageAddItem(page, (Item) itup, itemsize, itup_off, false, false)
         == InvalidOffsetNumber)
diff --git a/src/backend/access/hash/hashsort.c b/src/backend/access/hash/hashsort.c

index 19563148d052bddd353b4cf6e6fc472f1df10bec..a4ab8384e13cf8b50182e890ee111ec00f4f6b51 100644 (file)
--- a/src/backend/access/hash/hashsort.c
+++ b/src/backend/access/hash/hashsort.c
@@ -145,7 +145,8 @@ _h_indexbuild(HSpool *hspool, Relation heapRel)
         Assert(hashkey >= lasthashkey);
  #endif
  
-       _hash_doinsert(hspool->index, itup, heapRel);
+       /* the tuples are sorted by hashkey, so pass 'sorted' as true */
+       _hash_doinsert(hspool->index, itup, heapRel, true);
  
         pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
                                      ++tups_done);
diff --git a/src/include/access/hash.h b/src/include/access/hash.h

index da372841c4b4184e368454c15e3d1bb126b7f8a2..02f35e63a707284f7acdb87066ae9724f5a65f90 100644 (file)
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -390,9 +390,11 @@ extern void hashadjustmembers(Oid opfamilyoid,
  /* private routines */
  
  /* hashinsert.c */
-extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel);
+extern void _hash_doinsert(Relation rel, IndexTuple itup, Relation heapRel,
+                          bool sorted);
  extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
-                                  Size itemsize, IndexTuple itup);
+                                  Size itemsize, IndexTuple itup,
+                                  bool appendtup);
  extern void _hash_pgaddmultitup(Relation rel, Buffer buf, IndexTuple *itups,
                                 OffsetNumber *itup_offsets, uint16 nitups);
author	David Rowley <[email protected]>
	Thu, 24 Nov 2022 04:21:44 +0000 (17:21 +1300)
committer	David Rowley <[email protected]>
	Thu, 24 Nov 2022 04:21:44 +0000 (17:21 +1300)
src/backend/access/hash/hash.c		patch \| blob \| blame \| history
src/backend/access/hash/hashinsert.c		patch \| blob \| blame \| history
src/backend/access/hash/hashsort.c		patch \| blob \| blame \| history
src/include/access/hash.h		patch \| blob \| blame \| history