Revert "TupleHashTable: store additional data along with tuple."
authorJeff Davis <[email protected]>
Mon, 13 Jan 2025 22:14:07 +0000 (14:14 -0800)
committerJeff Davis <[email protected]>
Mon, 13 Jan 2025 22:14:33 +0000 (14:14 -0800)
This reverts commit e0ece2a981ee9068f50c4423e303836c2585eb02 due to
performance regressions.

Reported-by: David Rowley
src/backend/executor/execGrouping.c
src/backend/executor/nodeAgg.c
src/backend/executor/nodeSetOp.c
src/backend/executor/nodeSubplan.c
src/include/executor/executor.h
src/include/nodes/execnodes.h

index 403104956753ec328b81641da6b766baa9823093..33b124fbb0a5014c451a83ef27325775d18c0b9d 100644 (file)
 #include "miscadmin.h"
 #include "utils/lsyscache.h"
 
-struct TupleHashEntryData
-{
-       MinimalTuple firstTuple;        /* copy of first tuple in this group */
-       uint32          status;                 /* hash status */
-       uint32          hash;                   /* hash value (cached) */
-};
-
 static int     TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
                                                                                                 const MinimalTuple tuple);
@@ -203,7 +196,6 @@ BuildTupleHashTable(PlanState *parent,
        hashtable->tab_collations = collations;
        hashtable->tablecxt = tablecxt;
        hashtable->tempcxt = tempcxt;
-       hashtable->additionalsize = additionalsize;
        hashtable->tableslot = NULL;    /* will be made on first lookup */
        hashtable->inputslot = NULL;
        hashtable->in_hash_expr = NULL;
@@ -281,15 +273,6 @@ ResetTupleHashTable(TupleHashTable hashtable)
        tuplehash_reset(hashtable->hashtab);
 }
 
-/*
- * Return size of the hash bucket. Useful for estimating memory usage.
- */
-size_t
-TupleHashEntrySize(void)
-{
-       return sizeof(TupleHashEntryData);
-}
-
 /*
  * Find or create a hashtable entry for the tuple group containing the
  * given tuple.  The tuple must be the same type as the hashtable entries.
@@ -356,24 +339,6 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
        return hash;
 }
 
-MinimalTuple
-TupleHashEntryGetTuple(TupleHashEntry entry)
-{
-       return entry->firstTuple;
-}
-
-/*
- * Get a pointer into the additional space allocated for this entry. The
- * amount of space available is the additionalsize specified to
- * BuildTupleHashTable(). If additionalsize was specified as zero, no
- * additional space is available and this function should not be called.
- */
-void *
-TupleHashEntryGetAdditional(TupleHashEntry entry)
-{
-       return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len);
-}
-
 /*
  * A variant of LookupTupleHashEntry for callers that have already computed
  * the hash value.
@@ -512,31 +477,13 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
                }
                else
                {
-                       MinimalTuple firstTuple;
-                       size_t          totalsize;      /* including alignment and additionalsize */
-
                        /* created new entry */
                        *isnew = true;
                        /* zero caller data */
+                       entry->additional = NULL;
                        MemoryContextSwitchTo(hashtable->tablecxt);
-
                        /* Copy the first tuple into the table context */
-                       firstTuple = ExecCopySlotMinimalTuple(slot);
-
-                       /*
-                        * Allocate additional space right after the MinimalTuple of size
-                        * additionalsize. The caller can get a pointer to this data with
-                        * TupleHashEntryGetAdditional(), and store arbitrary data there.
-                        *
-                        * This avoids the need to store an extra pointer or allocate an
-                        * additional chunk, which would waste memory.
-                        */
-                       totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize;
-                       firstTuple = repalloc(firstTuple, totalsize);
-                       memset((char *) firstTuple + firstTuple->t_len, 0,
-                                  totalsize - firstTuple->t_len);
-
-                       entry->firstTuple = firstTuple;
+                       entry->firstTuple = ExecCopySlotMinimalTuple(slot);
                }
        }
        else
index 19640752967667410a98959105c2ce2e51e9eb62..3005b5c0e3b89ee38a4ba2288d61c60676232df6 100644 (file)
@@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
                transitionChunkSize = 0;
 
        return
-               TupleHashEntrySize() +
+               sizeof(TupleHashEntryData) +
                tupleChunkSize +
                pergroupChunkSize +
                transitionChunkSize;
@@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
        if (aggstate->hash_ngroups_current > 0)
        {
                aggstate->hashentrysize =
-                       TupleHashEntrySize() +
+                       sizeof(TupleHashEntryData) +
                        (hashkey_mem / (double) aggstate->hash_ngroups_current);
        }
 }
@@ -2055,7 +2055,11 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
        if (aggstate->numtrans == 0)
                return;
 
-       pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
+       pergroup = (AggStatePerGroup)
+               MemoryContextAlloc(hashtable->tablecxt,
+                                                  sizeof(AggStatePerGroupData) * aggstate->numtrans);
+
+       entry->additional = pergroup;
 
        /*
         * Initialize aggregates for new tuple group, lookup_hash_entries()
@@ -2119,7 +2123,7 @@ lookup_hash_entries(AggState *aggstate)
                {
                        if (isnew)
                                initialize_hash_entry(aggstate, hashtable, entry);
-                       pergroup[setno] = TupleHashEntryGetAdditional(entry);
+                       pergroup[setno] = entry->additional;
                }
                else
                {
@@ -2677,7 +2681,7 @@ agg_refill_hash_table(AggState *aggstate)
                {
                        if (isnew)
                                initialize_hash_entry(aggstate, perhash->hashtable, entry);
-                       aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry);
+                       aggstate->hash_pergroup[batch->setno] = entry->additional;
                        advance_aggregates(aggstate);
                }
                else
@@ -2769,7 +2773,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
        ExprContext *econtext;
        AggStatePerAgg peragg;
        AggStatePerGroup pergroup;
-       TupleHashEntry entry;
+       TupleHashEntryData *entry;
        TupleTableSlot *firstSlot;
        TupleTableSlot *result;
        AggStatePerHash perhash;
@@ -2841,7 +2845,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
                 * Transform representative tuple back into one with the right
                 * columns.
                 */
-               ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false);
+               ExecStoreMinimalTuple(entry->firstTuple, hashslot, false);
                slot_getallattrs(hashslot);
 
                ExecClearTuple(firstSlot);
@@ -2857,7 +2861,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
                }
                ExecStoreVirtualTuple(firstSlot);
 
-               pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
+               pergroup = (AggStatePerGroup) entry->additional;
 
                /*
                 * Use the representative input tuple for any references to
index 2a50d56fc8798c4ff048e64398d120792b24ce68..5b7ff9c374850db6ab451252bbada724a39d39bc 100644 (file)
@@ -425,7 +425,6 @@ setop_fill_hash_table(SetOpState *setopstate)
        {
                TupleTableSlot *outerslot;
                TupleHashEntryData *entry;
-               SetOpStatePerGroup pergroup;
                bool            isnew;
 
                outerslot = ExecProcNode(outerPlan);
@@ -438,16 +437,16 @@ setop_fill_hash_table(SetOpState *setopstate)
                                                                         outerslot,
                                                                         &isnew, NULL);
 
-               pergroup = TupleHashEntryGetAdditional(entry);
                /* If new tuple group, initialize counts to zero */
                if (isnew)
                {
-                       pergroup->numLeft = 0;
-                       pergroup->numRight = 0;
+                       entry->additional = (SetOpStatePerGroup)
+                               MemoryContextAllocZero(setopstate->hashtable->tablecxt,
+                                                                          sizeof(SetOpStatePerGroupData));
                }
 
                /* Advance the counts */
-               pergroup->numLeft++;
+               ((SetOpStatePerGroup) entry->additional)->numLeft++;
 
                /* Must reset expression context after each hashtable lookup */
                ResetExprContext(econtext);
@@ -479,7 +478,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 
                        /* Advance the counts if entry is already present */
                        if (entry)
-                               ((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++;
+                               ((SetOpStatePerGroup) entry->additional)->numRight++;
 
                        /* Must reset expression context after each hashtable lookup */
                        ResetExprContext(econtext);
@@ -497,7 +496,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 static TupleTableSlot *
 setop_retrieve_hash_table(SetOpState *setopstate)
 {
-       TupleHashEntry entry;
+       TupleHashEntryData *entry;
        TupleTableSlot *resultTupleSlot;
 
        /*
@@ -527,12 +526,12 @@ setop_retrieve_hash_table(SetOpState *setopstate)
                 * See if we should emit any copies of this tuple, and if so return
                 * the first copy.
                 */
-               set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry));
+               set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
 
                if (setopstate->numOutput > 0)
                {
                        setopstate->numOutput--;
-                       return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry),
+                       return ExecStoreMinimalTuple(entry->firstTuple,
                                                                                 resultTupleSlot,
                                                                                 false);
                }
index f7f6fc2da0b95b48f7159fb976505dd00d6d76d0..49767ed6a52af813cb4efb72f65ad33a4f842d29 100644 (file)
@@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
        {
                CHECK_FOR_INTERRUPTS();
 
-               ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false);
+               ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
                if (!execTuplesUnequal(slot, hashtable->tableslot,
                                                           numCols, keyColIdx,
                                                           eqfunctions,
index a49c32072e3ab37fd2a9252cb571b263bfcbdc44..f8a8d03e5337599eedc0c815f1c202004411b427 100644 (file)
@@ -148,12 +148,9 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
                                                                                   bool *isnew, uint32 *hash);
 extern uint32 TupleHashTableHash(TupleHashTable hashtable,
                                                                 TupleTableSlot *slot);
-extern size_t TupleHashEntrySize(void);
 extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
                                                                                           TupleTableSlot *slot,
                                                                                           bool *isnew, uint32 hash);
-extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry);
-extern void *TupleHashEntryGetAdditional(TupleHashEntry entry);
 extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
                                                                                 TupleTableSlot *slot,
                                                                                 ExprState *eqcomp,
index 28d45e0e2aadd693917ee5be557c39783f3b6e64..b3f7aa299f57ab6f238bbebb52b1d8b416685a65 100644 (file)
@@ -806,10 +806,17 @@ typedef struct ExecAuxRowMark
  * point to tab_hash_expr and tab_eq_func respectively.
  * ----------------------------------------------------------------
  */
-typedef struct TupleHashEntryData TupleHashEntryData;
 typedef struct TupleHashEntryData *TupleHashEntry;
 typedef struct TupleHashTableData *TupleHashTable;
 
+typedef struct TupleHashEntryData
+{
+       MinimalTuple firstTuple;        /* copy of first tuple in this group */
+       void       *additional;         /* user data */
+       uint32          status;                 /* hash status */
+       uint32          hash;                   /* hash value (cached) */
+} TupleHashEntryData;
+
 /* define parameters necessary to generate the tuple hash table interface */
 #define SH_PREFIX tuplehash
 #define SH_ELEMENT_TYPE TupleHashEntryData
@@ -828,7 +835,6 @@ typedef struct TupleHashTableData
        Oid                *tab_collations; /* collations for hash and comparison */
        MemoryContext tablecxt;         /* memory context containing table */
        MemoryContext tempcxt;          /* context for function evaluations */
-       Size            additionalsize; /* size of additional data */
        TupleTableSlot *tableslot;      /* slot for referencing table entries */
        /* The following fields are set transiently for each table search: */
        TupleTableSlot *inputslot;      /* current input tuple's slot */