From: Jeff Davis Date: Mon, 13 Jan 2025 22:14:07 +0000 (-0800) Subject: Revert "TupleHashTable: store additional data along with tuple." X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=b4a07f532b40a64fb4714a3f7ab6063435411edb;p=users%2Fc2main%2Fpostgres.git Revert "TupleHashTable: store additional data along with tuple." This reverts commit e0ece2a981ee9068f50c4423e303836c2585eb02 due to performance regressions. Reported-by: David Rowley --- diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 4031049567..33b124fbb0 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -20,13 +20,6 @@ #include "miscadmin.h" #include "utils/lsyscache.h" -struct TupleHashEntryData -{ - MinimalTuple firstTuple; /* copy of first tuple in this group */ - uint32 status; /* hash status */ - uint32 hash; /* hash value (cached) */ -}; - static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple); @@ -203,7 +196,6 @@ BuildTupleHashTable(PlanState *parent, hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; - hashtable->additionalsize = additionalsize; hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->inputslot = NULL; hashtable->in_hash_expr = NULL; @@ -281,15 +273,6 @@ ResetTupleHashTable(TupleHashTable hashtable) tuplehash_reset(hashtable->hashtab); } -/* - * Return size of the hash bucket. Useful for estimating memory usage. - */ -size_t -TupleHashEntrySize(void) -{ - return sizeof(TupleHashEntryData); -} - /* * Find or create a hashtable entry for the tuple group containing the * given tuple. The tuple must be the same type as the hashtable entries. @@ -356,24 +339,6 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot) return hash; } -MinimalTuple -TupleHashEntryGetTuple(TupleHashEntry entry) -{ - return entry->firstTuple; -} - -/* - * Get a pointer into the additional space allocated for this entry. The - * amount of space available is the additionalsize specified to - * BuildTupleHashTable(). If additionalsize was specified as zero, no - * additional space is available and this function should not be called. - */ -void * -TupleHashEntryGetAdditional(TupleHashEntry entry) -{ - return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len); -} - /* * A variant of LookupTupleHashEntry for callers that have already computed * the hash value. @@ -512,31 +477,13 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, } else { - MinimalTuple firstTuple; - size_t totalsize; /* including alignment and additionalsize */ - /* created new entry */ *isnew = true; /* zero caller data */ + entry->additional = NULL; MemoryContextSwitchTo(hashtable->tablecxt); - /* Copy the first tuple into the table context */ - firstTuple = ExecCopySlotMinimalTuple(slot); - - /* - * Allocate additional space right after the MinimalTuple of size - * additionalsize. The caller can get a pointer to this data with - * TupleHashEntryGetAdditional(), and store arbitrary data there. - * - * This avoids the need to store an extra pointer or allocate an - * additional chunk, which would waste memory. - */ - totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize; - firstTuple = repalloc(firstTuple, totalsize); - memset((char *) firstTuple + firstTuple->t_len, 0, - totalsize - firstTuple->t_len); - - entry->firstTuple = firstTuple; + entry->firstTuple = ExecCopySlotMinimalTuple(slot); } } else diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index 1964075296..3005b5c0e3 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace) transitionChunkSize = 0; return - TupleHashEntrySize() + + sizeof(TupleHashEntryData) + tupleChunkSize + pergroupChunkSize + transitionChunkSize; @@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions) if (aggstate->hash_ngroups_current > 0) { aggstate->hashentrysize = - TupleHashEntrySize() + + sizeof(TupleHashEntryData) + (hashkey_mem / (double) aggstate->hash_ngroups_current); } } @@ -2055,7 +2055,11 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable, if (aggstate->numtrans == 0) return; - pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry); + pergroup = (AggStatePerGroup) + MemoryContextAlloc(hashtable->tablecxt, + sizeof(AggStatePerGroupData) * aggstate->numtrans); + + entry->additional = pergroup; /* * Initialize aggregates for new tuple group, lookup_hash_entries() @@ -2119,7 +2123,7 @@ lookup_hash_entries(AggState *aggstate) { if (isnew) initialize_hash_entry(aggstate, hashtable, entry); - pergroup[setno] = TupleHashEntryGetAdditional(entry); + pergroup[setno] = entry->additional; } else { @@ -2677,7 +2681,7 @@ agg_refill_hash_table(AggState *aggstate) { if (isnew) initialize_hash_entry(aggstate, perhash->hashtable, entry); - aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry); + aggstate->hash_pergroup[batch->setno] = entry->additional; advance_aggregates(aggstate); } else @@ -2769,7 +2773,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate) ExprContext *econtext; AggStatePerAgg peragg; AggStatePerGroup pergroup; - TupleHashEntry entry; + TupleHashEntryData *entry; TupleTableSlot *firstSlot; TupleTableSlot *result; AggStatePerHash perhash; @@ -2841,7 +2845,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate) * Transform representative tuple back into one with the right * columns. */ - ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false); + ExecStoreMinimalTuple(entry->firstTuple, hashslot, false); slot_getallattrs(hashslot); ExecClearTuple(firstSlot); @@ -2857,7 +2861,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate) } ExecStoreVirtualTuple(firstSlot); - pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry); + pergroup = (AggStatePerGroup) entry->additional; /* * Use the representative input tuple for any references to diff --git a/src/backend/executor/nodeSetOp.c b/src/backend/executor/nodeSetOp.c index 2a50d56fc8..5b7ff9c374 100644 --- a/src/backend/executor/nodeSetOp.c +++ b/src/backend/executor/nodeSetOp.c @@ -425,7 +425,6 @@ setop_fill_hash_table(SetOpState *setopstate) { TupleTableSlot *outerslot; TupleHashEntryData *entry; - SetOpStatePerGroup pergroup; bool isnew; outerslot = ExecProcNode(outerPlan); @@ -438,16 +437,16 @@ setop_fill_hash_table(SetOpState *setopstate) outerslot, &isnew, NULL); - pergroup = TupleHashEntryGetAdditional(entry); /* If new tuple group, initialize counts to zero */ if (isnew) { - pergroup->numLeft = 0; - pergroup->numRight = 0; + entry->additional = (SetOpStatePerGroup) + MemoryContextAllocZero(setopstate->hashtable->tablecxt, + sizeof(SetOpStatePerGroupData)); } /* Advance the counts */ - pergroup->numLeft++; + ((SetOpStatePerGroup) entry->additional)->numLeft++; /* Must reset expression context after each hashtable lookup */ ResetExprContext(econtext); @@ -479,7 +478,7 @@ setop_fill_hash_table(SetOpState *setopstate) /* Advance the counts if entry is already present */ if (entry) - ((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++; + ((SetOpStatePerGroup) entry->additional)->numRight++; /* Must reset expression context after each hashtable lookup */ ResetExprContext(econtext); @@ -497,7 +496,7 @@ setop_fill_hash_table(SetOpState *setopstate) static TupleTableSlot * setop_retrieve_hash_table(SetOpState *setopstate) { - TupleHashEntry entry; + TupleHashEntryData *entry; TupleTableSlot *resultTupleSlot; /* @@ -527,12 +526,12 @@ setop_retrieve_hash_table(SetOpState *setopstate) * See if we should emit any copies of this tuple, and if so return * the first copy. */ - set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry)); + set_output_count(setopstate, (SetOpStatePerGroup) entry->additional); if (setopstate->numOutput > 0) { setopstate->numOutput--; - return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), + return ExecStoreMinimalTuple(entry->firstTuple, resultTupleSlot, false); } diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c index f7f6fc2da0..49767ed6a5 100644 --- a/src/backend/executor/nodeSubplan.c +++ b/src/backend/executor/nodeSubplan.c @@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot, { CHECK_FOR_INTERRUPTS(); - ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false); + ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false); if (!execTuplesUnequal(slot, hashtable->tableslot, numCols, keyColIdx, eqfunctions, diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index a49c32072e..f8a8d03e53 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -148,12 +148,9 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, bool *isnew, uint32 *hash); extern uint32 TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot); -extern size_t TupleHashEntrySize(void); extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew, uint32 hash); -extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry); -extern void *TupleHashEntryGetAdditional(TupleHashEntry entry); extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 28d45e0e2a..b3f7aa299f 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -806,10 +806,17 @@ typedef struct ExecAuxRowMark * point to tab_hash_expr and tab_eq_func respectively. * ---------------------------------------------------------------- */ -typedef struct TupleHashEntryData TupleHashEntryData; typedef struct TupleHashEntryData *TupleHashEntry; typedef struct TupleHashTableData *TupleHashTable; +typedef struct TupleHashEntryData +{ + MinimalTuple firstTuple; /* copy of first tuple in this group */ + void *additional; /* user data */ + uint32 status; /* hash status */ + uint32 hash; /* hash value (cached) */ +} TupleHashEntryData; + /* define parameters necessary to generate the tuple hash table interface */ #define SH_PREFIX tuplehash #define SH_ELEMENT_TYPE TupleHashEntryData @@ -828,7 +835,6 @@ typedef struct TupleHashTableData Oid *tab_collations; /* collations for hash and comparison */ MemoryContext tablecxt; /* memory context containing table */ MemoryContext tempcxt; /* context for function evaluations */ - Size additionalsize; /* size of additional data */ TupleTableSlot *tableslot; /* slot for referencing table entries */ /* The following fields are set transiently for each table search: */ TupleTableSlot *inputslot; /* current input tuple's slot */