sharedtuplestore.o \
sortsupport.o \
tuplesort.o \
+ tuplesortvariants.o \
tuplestore.o
include $(top_srcdir)/src/backend/common.mk
* tuplesort.c
* Generalized tuple sorting routines.
*
- * This module handles sorting of heap tuples, index tuples, or single
- * Datums (and could easily support other kinds of sortable objects,
- * if necessary). It works efficiently for both small and large amounts
+ * This module provides a generalized facility for tuple sorting, which can be
+ * applied to different kinds of sortable objects. Implementation of
+ * the particular sorting variants is given in tuplesortvariants.c.
+ * This module works efficiently for both small and large amounts
* of data. Small amounts are sorted in-memory using qsort(). Large
* amounts are sorted using temporary files and a standard external sort
* algorithm.
#include <limits.h>
-#include "access/hash.h"
-#include "access/htup_details.h"
-#include "access/nbtree.h"
-#include "catalog/index.h"
#include "catalog/pg_am.h"
#include "commands/tablespace.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "pg_trace.h"
-#include "utils/datum.h"
-#include "utils/logtape.h"
-#include "utils/lsyscache.h"
+#include "storage/shmem.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
#include "utils/rel.h"
-#include "utils/sortsupport.h"
#include "utils/tuplesort.h"
-
-/* sort-type codes for sort__start probes */
-#define HEAP_SORT 0
-#define INDEX_SORT 1
-#define DATUM_SORT 2
-#define CLUSTER_SORT 3
-
-/* Sort parallel code from state for sort__start probes */
-#define PARALLEL_SORT(coordinate) (coordinate == NULL || \
- (coordinate)->sharedsort == NULL ? 0 : \
- (coordinate)->isWorker ? 1 : 2)
-
/*
* Initial size of memtuples array. We're trying to select this size so that
* array doesn't exceed ALLOCSET_SEPARATE_THRESHOLD and so that the overhead of
#endif
-/*
- * The objects we actually sort are SortTuple structs. These contain
- * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple),
- * which is a separate palloc chunk --- we assume it is just one chunk and
- * can be freed by a simple pfree() (except during merge, when we use a
- * simple slab allocator). SortTuples also contain the tuple's first key
- * column in Datum/nullflag format, and a source/input tape number that
- * tracks which tape each heap element/slot belongs to during merging.
- *
- * Storing the first key column lets us save heap_getattr or index_getattr
- * calls during tuple comparisons. We could extract and save all the key
- * columns not just the first, but this would increase code complexity and
- * overhead, and wouldn't actually save any comparison cycles in the common
- * case where the first key determines the comparison result. Note that
- * for a pass-by-reference datatype, datum1 points into the "tuple" storage.
- *
- * There is one special case: when the sort support infrastructure provides an
- * "abbreviated key" representation, where the key is (typically) a pass by
- * value proxy for a pass by reference type. In this case, the abbreviated key
- * is stored in datum1 in place of the actual first key column.
- *
- * When sorting single Datums, the data value is represented directly by
- * datum1/isnull1 for pass by value types (or null values). If the datatype is
- * pass-by-reference and isnull1 is false, then "tuple" points to a separately
- * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then
- * either the same pointer as "tuple", or is an abbreviated key value as
- * described above. Accordingly, "tuple" is always used in preference to
- * datum1 as the authoritative value for pass-by-reference cases.
- */
-typedef struct
-{
- void *tuple; /* the tuple itself */
- Datum datum1; /* value of first key column */
- bool isnull1; /* is first key column NULL? */
- int srctape; /* source tape number */
-} SortTuple;
-
/*
* During merge, we use a pre-allocated set of fixed-size slots to hold
* tuples. To avoid palloc/pfree overhead.
#define TAPE_BUFFER_OVERHEAD BLCKSZ
#define MERGE_BUFFER_SIZE (BLCKSZ * 32)
-typedef struct TuplesortPublic TuplesortPublic;
-
-typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state);
-
-/*
- * The public part of a Tuple sort operation state. This data structure
- * containsthe definition of sort-variant-specific interface methods and
- * the part of Tuple sort operation state required by their implementations.
- */
-struct TuplesortPublic
-{
- /*
- * These function pointers decouple the routines that must know what kind
- * of tuple we are sorting from the routines that don't need to know it.
- * They are set up by the tuplesort_begin_xxx routines.
- *
- * Function to compare two tuples; result is per qsort() convention, ie:
- * <0, 0, >0 according as a<b, a=b, a>b. The API must match
- * qsort_arg_comparator.
- */
- SortTupleComparator comparetup;
-
- /*
- * Alter datum1 representation in the SortTuple's array back from the
- * abbreviated key to the first column value.
- */
- void (*removeabbrev) (Tuplesortstate *state, SortTuple *stups,
- int count);
-
- /*
- * Function to write a stored tuple onto tape. The representation of the
- * tuple on tape need not be the same as it is in memory.
- */
- void (*writetup) (Tuplesortstate *state, LogicalTape *tape,
- SortTuple *stup);
-
- /*
- * Function to read a stored tuple from tape back into memory. 'len' is
- * the already-read length of the stored tuple. The tuple is allocated
- * from the slab memory arena, or is palloc'd, see readtup_alloc().
- */
- void (*readtup) (Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len);
-
- /*
- * Function to do some specific release of resources for the sort variant.
- * In particular, this function should free everything stored in the "arg"
- * field, which wouldn't be cleared on reset of the Tuple sort memory
- * contextes. This can be NULL if nothing specific needs to be done.
- */
- void (*freestate) (Tuplesortstate *state);
-
- /*
- * The subsequent fields are used in the implementations of the functions
- * above.
- */
- MemoryContext maincontext; /* memory context for tuple sort metadata that
- * persists across multiple batches */
- MemoryContext sortcontext; /* memory context holding most sort data */
- MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */
-
- /*
- * Whether SortTuple's datum1 and isnull1 members are maintained by the
- * above routines. If not, some sort specializations are disabled.
- */
- bool haveDatum1;
-
- /*
- * The sortKeys variable is used by every case other than the hash index
- * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the
- * MinimalTuple and CLUSTER routines, though.
- */
- int nKeys; /* number of columns in sort key */
- SortSupport sortKeys; /* array of length nKeys */
-
- /*
- * This variable is shared by the single-key MinimalTuple case and the
- * Datum case (which both use qsort_ssup()). Otherwise, it's NULL. The
- * presence of a value in this field is also checked by various sort
- * specialization functions as an optimization when comparing the leading
- * key in a tiebreak situation to determine if there are any subsequent
- * keys to sort on.
- */
- SortSupport onlyKey;
-
- int sortopt; /* Bitmask of flags used to setup sort */
-
- bool tuples; /* Can SortTuple.tuple ever be set? */
-
- void *arg; /* Specific information for the sort variant */
-};
-
-/*
- * Data struture pointed by "TuplesortPublic.arg" for the CLUSTER case. Set by
- * the tuplesort_begin_cluster.
- */
-typedef struct
-{
- TupleDesc tupDesc;
-
- IndexInfo *indexInfo; /* info about index being used for reference */
- EState *estate; /* for evaluating index expressions */
-} TuplesortClusterArg;
-
-/*
- * Data struture pointed by "TuplesortPublic.arg" for the IndexTuple case.
- * Set by tuplesort_begin_index_xxx and used only by the IndexTuple routines.
- */
-typedef struct
-{
- Relation heapRel; /* table the index is being built on */
- Relation indexRel; /* index being built */
-} TuplesortIndexArg;
-
-/*
- * Data struture pointed by "TuplesortPublic.arg" for the index_btree subcase.
- */
-typedef struct
-{
- TuplesortIndexArg index;
-
- bool enforceUnique; /* complain if we find duplicate tuples */
- bool uniqueNullsNotDistinct; /* unique constraint null treatment */
-} TuplesortIndexBTreeArg;
-
-/*
- * Data struture pointed by "TuplesortPublic.arg" for the index_hash subcase.
- */
-typedef struct
-{
- TuplesortIndexArg index;
-
- uint32 high_mask; /* masks for sortable part of hash code */
- uint32 low_mask;
- uint32 max_buckets;
-} TuplesortIndexHashArg;
-
-/*
- * Data struture pointed by "TuplesortPublic.arg" for the Datum case.
- * Set by tuplesort_begin_datum and used only by the DatumTuple routines.
- */
-typedef struct
-{
- /* the datatype oid of Datum's to be sorted */
- Oid datumType;
- /* we need typelen in order to know how to copy the Datums. */
- int datumTypeLen;
-} TuplesortDatumArg;
/*
* Private state of a Tuplesort operation.
pfree(buf); \
} while(0)
-#define TuplesortstateGetPublic(state) ((TuplesortPublic *) state)
-
#define REMOVEABBREV(state,stup,count) ((*(state)->base.removeabbrev) (state, stup, count))
#define COMPARETUP(state,a,b) ((*(state)->base.comparetup) (a, b, state))
#define WRITETUP(state,tape,stup) (writetuple(state, tape, stup))
* begins).
*/
-/* When using this macro, beware of double evaluation of len */
-#define LogicalTapeReadExact(tape, ptr, len) \
- do { \
- if (LogicalTapeRead(tape, ptr, len) != (size_t) (len)) \
- elog(ERROR, "unexpected end of data"); \
- } while(0)
-
-static Tuplesortstate *tuplesort_begin_common(int workMem,
- SortCoordinate coordinate,
- int sortopt);
static void tuplesort_begin_batch(Tuplesortstate *state);
-static void puttuple_common(Tuplesortstate *state, SortTuple *tuple,
- bool useAbbrev);
static void writetuple(Tuplesortstate *state, LogicalTape *tape,
SortTuple *stup);
static bool consider_abort_common(Tuplesortstate *state);
static void reversedirection(Tuplesortstate *state);
static unsigned int getlen(LogicalTape *tape, bool eofOK);
static void markrunend(LogicalTape *tape);
-static void *readtup_alloc(Tuplesortstate *state, Size tuplen);
-static void removeabbrev_heap(Tuplesortstate *state, SortTuple *stups,
- int count);
-static void removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups,
- int count);
-static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
- int count);
-static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
- int count);
-static int comparetup_heap(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state);
-static void writetup_heap(Tuplesortstate *state, LogicalTape *tape,
- SortTuple *stup);
-static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len);
-static int comparetup_cluster(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state);
-static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape,
- SortTuple *stup);
-static void readtup_cluster(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len);
-static int comparetup_index_btree(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state);
-static int comparetup_index_hash(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state);
-static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
- SortTuple *stup);
-static void readtup_index(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len);
-static int comparetup_datum(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state);
-static void writetup_datum(Tuplesortstate *state, LogicalTape *tape,
- SortTuple *stup);
-static void readtup_datum(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len);
-static void freestate_cluster(Tuplesortstate *state);
static int worker_get_identifier(Tuplesortstate *state);
static void worker_freeze_result_tape(Tuplesortstate *state);
static void worker_nomergeruns(Tuplesortstate *state);
* sort options. See TUPLESORT_* definitions in tuplesort.h
*/
-static Tuplesortstate *
+Tuplesortstate *
tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt)
{
Tuplesortstate *state;
MemoryContextSwitchTo(oldcontext);
}
-Tuplesortstate *
-tuplesort_begin_heap(TupleDesc tupDesc,
- int nkeys, AttrNumber *attNums,
- Oid *sortOperators, Oid *sortCollations,
- bool *nullsFirstFlags,
- int workMem, SortCoordinate coordinate, int sortopt)
-{
- Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
- sortopt);
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext;
- int i;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
-
- AssertArg(nkeys > 0);
-
-#ifdef TRACE_SORT
- if (trace_sort)
- elog(LOG,
- "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
- nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
-#endif
-
- base->nKeys = nkeys;
-
- TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
- false, /* no unique check */
- nkeys,
- workMem,
- sortopt & TUPLESORT_RANDOMACCESS,
- PARALLEL_SORT(coordinate));
-
- base->removeabbrev = removeabbrev_heap;
- base->comparetup = comparetup_heap;
- base->writetup = writetup_heap;
- base->readtup = readtup_heap;
- base->haveDatum1 = true;
- base->arg = tupDesc; /* assume we need not copy tupDesc */
-
- /* Prepare SortSupport data for each column */
- base->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
-
- for (i = 0; i < nkeys; i++)
- {
- SortSupport sortKey = base->sortKeys + i;
-
- AssertArg(attNums[i] != 0);
- AssertArg(sortOperators[i] != 0);
-
- sortKey->ssup_cxt = CurrentMemoryContext;
- sortKey->ssup_collation = sortCollations[i];
- sortKey->ssup_nulls_first = nullsFirstFlags[i];
- sortKey->ssup_attno = attNums[i];
- /* Convey if abbreviation optimization is applicable in principle */
- sortKey->abbreviate = (i == 0 && base->haveDatum1);
-
- PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
- }
-
- /*
- * The "onlyKey" optimization cannot be used with abbreviated keys, since
- * tie-breaker comparisons may be required. Typically, the optimization
- * is only of value to pass-by-value types anyway, whereas abbreviated
- * keys are typically only of value to pass-by-reference types.
- */
- if (nkeys == 1 && !base->sortKeys->abbrev_converter)
- base->onlyKey = base->sortKeys;
-
- MemoryContextSwitchTo(oldcontext);
-
- return state;
-}
-
-Tuplesortstate *
-tuplesort_begin_cluster(TupleDesc tupDesc,
- Relation indexRel,
- int workMem,
- SortCoordinate coordinate, int sortopt)
-{
- Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
- sortopt);
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- BTScanInsert indexScanKey;
- MemoryContext oldcontext;
- TuplesortClusterArg *arg;
- int i;
-
- Assert(indexRel->rd_rel->relam == BTREE_AM_OID);
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortClusterArg *) palloc0(sizeof(TuplesortClusterArg));
-
-#ifdef TRACE_SORT
- if (trace_sort)
- elog(LOG,
- "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
- RelationGetNumberOfAttributes(indexRel),
- workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
-#endif
-
- base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
-
- TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT,
- false, /* no unique check */
- base->nKeys,
- workMem,
- sortopt & TUPLESORT_RANDOMACCESS,
- PARALLEL_SORT(coordinate));
-
- base->removeabbrev = removeabbrev_cluster;
- base->comparetup = comparetup_cluster;
- base->writetup = writetup_cluster;
- base->readtup = readtup_cluster;
- base->freestate = freestate_cluster;
- base->arg = arg;
-
- arg->indexInfo = BuildIndexInfo(indexRel);
-
- /*
- * If we don't have a simple leading attribute, we don't currently
- * initialize datum1, so disable optimizations that require it.
- */
- if (arg->indexInfo->ii_IndexAttrNumbers[0] == 0)
- base->haveDatum1 = false;
- else
- base->haveDatum1 = true;
-
- arg->tupDesc = tupDesc; /* assume we need not copy tupDesc */
-
- indexScanKey = _bt_mkscankey(indexRel, NULL);
-
- if (arg->indexInfo->ii_Expressions != NULL)
- {
- TupleTableSlot *slot;
- ExprContext *econtext;
-
- /*
- * We will need to use FormIndexDatum to evaluate the index
- * expressions. To do that, we need an EState, as well as a
- * TupleTableSlot to put the table tuples into. The econtext's
- * scantuple has to point to that slot, too.
- */
- arg->estate = CreateExecutorState();
- slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple);
- econtext = GetPerTupleExprContext(arg->estate);
- econtext->ecxt_scantuple = slot;
- }
-
- /* Prepare SortSupport data for each column */
- base->sortKeys = (SortSupport) palloc0(base->nKeys *
- sizeof(SortSupportData));
-
- for (i = 0; i < base->nKeys; i++)
- {
- SortSupport sortKey = base->sortKeys + i;
- ScanKey scanKey = indexScanKey->scankeys + i;
- int16 strategy;
-
- sortKey->ssup_cxt = CurrentMemoryContext;
- sortKey->ssup_collation = scanKey->sk_collation;
- sortKey->ssup_nulls_first =
- (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
- sortKey->ssup_attno = scanKey->sk_attno;
- /* Convey if abbreviation optimization is applicable in principle */
- sortKey->abbreviate = (i == 0 && base->haveDatum1);
-
- AssertState(sortKey->ssup_attno != 0);
-
- strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
- BTGreaterStrategyNumber : BTLessStrategyNumber;
-
- PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
- }
-
- pfree(indexScanKey);
-
- MemoryContextSwitchTo(oldcontext);
-
- return state;
-}
-
-Tuplesortstate *
-tuplesort_begin_index_btree(Relation heapRel,
- Relation indexRel,
- bool enforceUnique,
- bool uniqueNullsNotDistinct,
- int workMem,
- SortCoordinate coordinate,
- int sortopt)
-{
- Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
- sortopt);
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- BTScanInsert indexScanKey;
- TuplesortIndexBTreeArg *arg;
- MemoryContext oldcontext;
- int i;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));
-
-#ifdef TRACE_SORT
- if (trace_sort)
- elog(LOG,
- "begin index sort: unique = %c, workMem = %d, randomAccess = %c",
- enforceUnique ? 't' : 'f',
- workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
-#endif
-
- base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
-
- TRACE_POSTGRESQL_SORT_START(INDEX_SORT,
- enforceUnique,
- base->nKeys,
- workMem,
- sortopt & TUPLESORT_RANDOMACCESS,
- PARALLEL_SORT(coordinate));
-
- base->removeabbrev = removeabbrev_index;
- base->comparetup = comparetup_index_btree;
- base->writetup = writetup_index;
- base->readtup = readtup_index;
- base->haveDatum1 = true;
- base->arg = arg;
-
- arg->index.heapRel = heapRel;
- arg->index.indexRel = indexRel;
- arg->enforceUnique = enforceUnique;
- arg->uniqueNullsNotDistinct = uniqueNullsNotDistinct;
-
- indexScanKey = _bt_mkscankey(indexRel, NULL);
-
- /* Prepare SortSupport data for each column */
- base->sortKeys = (SortSupport) palloc0(base->nKeys *
- sizeof(SortSupportData));
-
- for (i = 0; i < base->nKeys; i++)
- {
- SortSupport sortKey = base->sortKeys + i;
- ScanKey scanKey = indexScanKey->scankeys + i;
- int16 strategy;
-
- sortKey->ssup_cxt = CurrentMemoryContext;
- sortKey->ssup_collation = scanKey->sk_collation;
- sortKey->ssup_nulls_first =
- (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
- sortKey->ssup_attno = scanKey->sk_attno;
- /* Convey if abbreviation optimization is applicable in principle */
- sortKey->abbreviate = (i == 0 && base->haveDatum1);
-
- AssertState(sortKey->ssup_attno != 0);
-
- strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
- BTGreaterStrategyNumber : BTLessStrategyNumber;
-
- PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
- }
-
- pfree(indexScanKey);
-
- MemoryContextSwitchTo(oldcontext);
-
- return state;
-}
-
-Tuplesortstate *
-tuplesort_begin_index_hash(Relation heapRel,
- Relation indexRel,
- uint32 high_mask,
- uint32 low_mask,
- uint32 max_buckets,
- int workMem,
- SortCoordinate coordinate,
- int sortopt)
-{
- Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
- sortopt);
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext;
- TuplesortIndexHashArg *arg;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortIndexHashArg *) palloc(sizeof(TuplesortIndexHashArg));
-
-#ifdef TRACE_SORT
- if (trace_sort)
- elog(LOG,
- "begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
- "max_buckets = 0x%x, workMem = %d, randomAccess = %c",
- high_mask,
- low_mask,
- max_buckets,
- workMem,
- sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
-#endif
-
- base->nKeys = 1; /* Only one sort column, the hash code */
-
- base->removeabbrev = removeabbrev_index;
- base->comparetup = comparetup_index_hash;
- base->writetup = writetup_index;
- base->readtup = readtup_index;
- base->haveDatum1 = true;
- base->arg = arg;
-
- arg->index.heapRel = heapRel;
- arg->index.indexRel = indexRel;
-
- arg->high_mask = high_mask;
- arg->low_mask = low_mask;
- arg->max_buckets = max_buckets;
-
- MemoryContextSwitchTo(oldcontext);
-
- return state;
-}
-
-Tuplesortstate *
-tuplesort_begin_index_gist(Relation heapRel,
- Relation indexRel,
- int workMem,
- SortCoordinate coordinate,
- int sortopt)
-{
- Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
- sortopt);
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext;
- TuplesortIndexBTreeArg *arg;
- int i;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));
-
-#ifdef TRACE_SORT
- if (trace_sort)
- elog(LOG,
- "begin index sort: workMem = %d, randomAccess = %c",
- workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
-#endif
-
- base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
-
- base->removeabbrev = removeabbrev_index;
- base->comparetup = comparetup_index_btree;
- base->writetup = writetup_index;
- base->readtup = readtup_index;
- base->haveDatum1 = true;
- base->arg = arg;
-
- arg->index.heapRel = heapRel;
- arg->index.indexRel = indexRel;
- arg->enforceUnique = false;
- arg->uniqueNullsNotDistinct = false;
-
- /* Prepare SortSupport data for each column */
- base->sortKeys = (SortSupport) palloc0(base->nKeys *
- sizeof(SortSupportData));
-
- for (i = 0; i < base->nKeys; i++)
- {
- SortSupport sortKey = base->sortKeys + i;
-
- sortKey->ssup_cxt = CurrentMemoryContext;
- sortKey->ssup_collation = indexRel->rd_indcollation[i];
- sortKey->ssup_nulls_first = false;
- sortKey->ssup_attno = i + 1;
- /* Convey if abbreviation optimization is applicable in principle */
- sortKey->abbreviate = (i == 0 && base->haveDatum1);
-
- AssertState(sortKey->ssup_attno != 0);
-
- /* Look for a sort support function */
- PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
- }
-
- MemoryContextSwitchTo(oldcontext);
-
- return state;
-}
-
-Tuplesortstate *
-tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
- bool nullsFirstFlag, int workMem,
- SortCoordinate coordinate, int sortopt)
-{
- Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
- sortopt);
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortDatumArg *arg;
- MemoryContext oldcontext;
- int16 typlen;
- bool typbyval;
-
- oldcontext = MemoryContextSwitchTo(base->maincontext);
- arg = (TuplesortDatumArg *) palloc(sizeof(TuplesortDatumArg));
-
-#ifdef TRACE_SORT
- if (trace_sort)
- elog(LOG,
- "begin datum sort: workMem = %d, randomAccess = %c",
- workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
-#endif
-
- base->nKeys = 1; /* always a one-column sort */
-
- TRACE_POSTGRESQL_SORT_START(DATUM_SORT,
- false, /* no unique check */
- 1,
- workMem,
- sortopt & TUPLESORT_RANDOMACCESS,
- PARALLEL_SORT(coordinate));
-
- base->removeabbrev = removeabbrev_datum;
- base->comparetup = comparetup_datum;
- base->writetup = writetup_datum;
- base->readtup = readtup_datum;
- state->abbrevNext = 10;
- base->haveDatum1 = true;
- base->arg = arg;
-
- arg->datumType = datumType;
-
- /* lookup necessary attributes of the datum type */
- get_typlenbyval(datumType, &typlen, &typbyval);
- arg->datumTypeLen = typlen;
- base->tuples = !typbyval;
-
- /* Prepare SortSupport data */
- base->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
-
- base->sortKeys->ssup_cxt = CurrentMemoryContext;
- base->sortKeys->ssup_collation = sortCollation;
- base->sortKeys->ssup_nulls_first = nullsFirstFlag;
-
- /*
- * Abbreviation is possible here only for by-reference types. In theory,
- * a pass-by-value datatype could have an abbreviated form that is cheaper
- * to compare. In a tuple sort, we could support that, because we can
- * always extract the original datum from the tuple as needed. Here, we
- * can't, because a datum sort only stores a single copy of the datum; the
- * "tuple" field of each SortTuple is NULL.
- */
- base->sortKeys->abbreviate = !typbyval;
-
- PrepareSortSupportFromOrderingOp(sortOperator, base->sortKeys);
-
- /*
- * The "onlyKey" optimization cannot be used with abbreviated keys, since
- * tie-breaker comparisons may be required. Typically, the optimization
- * is only of value to pass-by-value types anyway, whereas abbreviated
- * keys are typically only of value to pass-by-reference types.
- */
- if (!base->sortKeys->abbrev_converter)
- base->onlyKey = base->sortKeys;
-
- MemoryContextSwitchTo(oldcontext);
-
- return state;
-}
-
/*
* tuplesort_set_bound
*
return false;
}
-/*
- * Accept one tuple while collecting input data for sort.
- *
- * Note that the input data is always copied; the caller need not save it.
- */
-void
-tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
- TupleDesc tupDesc = (TupleDesc) base->arg;
- SortTuple stup;
- MinimalTuple tuple;
- HeapTupleData htup;
-
- /* copy the tuple into sort storage */
- tuple = ExecCopySlotMinimalTuple(slot);
- stup.tuple = (void *) tuple;
- /* set up first-column key value */
- htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
- htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
- stup.datum1 = heap_getattr(&htup,
- base->sortKeys[0].ssup_attno,
- tupDesc,
- &stup.isnull1);
-
- puttuple_common(state, &stup,
- base->sortKeys->abbrev_converter && !stup.isnull1);
-
- MemoryContextSwitchTo(oldcontext);
-}
-
-/*
- * Accept one tuple while collecting input data for sort.
- *
- * Note that the input data is always copied; the caller need not save it.
- */
-void
-tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
-{
- SortTuple stup;
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
- TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
-
- /* copy the tuple into sort storage */
- tup = heap_copytuple(tup);
- stup.tuple = (void *) tup;
-
- /*
- * set up first-column key value, and potentially abbreviate, if it's a
- * simple column
- */
- if (base->haveDatum1)
- {
- stup.datum1 = heap_getattr(tup,
- arg->indexInfo->ii_IndexAttrNumbers[0],
- arg->tupDesc,
- &stup.isnull1);
- }
-
- puttuple_common(state, &stup,
- base->haveDatum1 && base->sortKeys->abbrev_converter && !stup.isnull1);
-
- MemoryContextSwitchTo(oldcontext);
-}
-
-/*
- * Collect one index tuple while collecting input data for sort, building
- * it from caller-supplied values.
- */
-void
-tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
- ItemPointer self, Datum *values,
- bool *isnull)
-{
- SortTuple stup;
- IndexTuple tuple;
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
-
- stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
- isnull, base->tuplecontext);
- tuple = ((IndexTuple) stup.tuple);
- tuple->t_tid = *self;
- /* set up first-column key value */
- stup.datum1 = index_getattr(tuple,
- 1,
- RelationGetDescr(arg->indexRel),
- &stup.isnull1);
-
- puttuple_common(state, &stup,
- base->sortKeys && base->sortKeys->abbrev_converter && !stup.isnull1);
-}
-
-/*
- * Accept one Datum while collecting input data for sort.
- *
- * If the Datum is pass-by-ref type, the value will be copied.
- */
-void
-tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
- TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
- SortTuple stup;
-
- /*
- * Pass-by-value types or null values are just stored directly in
- * stup.datum1 (and stup.tuple is not used and set to NULL).
- *
- * Non-null pass-by-reference values need to be copied into memory we
- * control, and possibly abbreviated. The copied value is pointed to by
- * stup.tuple and is treated as the canonical copy (e.g. to return via
- * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
- * abbreviated value if abbreviation is happening, otherwise it's
- * identical to stup.tuple.
- */
-
- if (isNull || !base->tuples)
- {
- /*
- * Set datum1 to zeroed representation for NULLs (to be consistent,
- * and to support cheap inequality tests for NULL abbreviated keys).
- */
- stup.datum1 = !isNull ? val : (Datum) 0;
- stup.isnull1 = isNull;
- stup.tuple = NULL; /* no separate storage */
- }
- else
- {
- stup.isnull1 = false;
- stup.datum1 = datumCopy(val, false, arg->datumTypeLen);
- stup.tuple = DatumGetPointer(stup.datum1);
- }
-
- puttuple_common(state, &stup,
- base->tuples && !isNull && base->sortKeys->abbrev_converter);
-
- MemoryContextSwitchTo(oldcontext);
-}
-
/*
* Shared code for tuple and datum cases.
*/
-static void
-puttuple_common(Tuplesortstate *state, SortTuple *tuple, bool useAbbrev)
+void
+tuplesort_puttuple_common(Tuplesortstate *state, SortTuple *tuple, bool useAbbrev)
{
MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext);
* by caller. Note that fetched tuple is stored in memory that may be
* recycled by any future fetch.
*/
-static bool
+bool
tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
SortTuple *stup)
{
}
newtup.srctape = srcTapeIndex;
tuplesort_heap_replace_top(state, &newtup);
- return true;
- }
- return false;
-
- default:
- elog(ERROR, "invalid tuplesort state");
- return false; /* keep compiler quiet */
- }
-}
-
-/*
- * Fetch the next tuple in either forward or back direction.
- * If successful, put tuple in slot and return true; else, clear the slot
- * and return false.
- *
- * Caller may optionally be passed back abbreviated value (on true return
- * value) when abbreviation was used, which can be used to cheaply avoid
- * equality checks that might otherwise be required. Caller can safely make a
- * determination of "non-equal tuple" based on simple binary inequality. A
- * NULL value in leading attribute will set abbreviated value to zeroed
- * representation, which caller may rely on in abbreviated inequality check.
- *
- * If copy is true, the slot receives a tuple that's been copied into the
- * caller's memory context, so that it will stay valid regardless of future
- * manipulations of the tuplesort's state (up to and including deleting the
- * tuplesort). If copy is false, the slot will just receive a pointer to a
- * tuple held within the tuplesort, which is more efficient, but only safe for
- * callers that are prepared to have any subsequent manipulation of the
- * tuplesort's state invalidate slot contents.
- */
-bool
-tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
- TupleTableSlot *slot, Datum *abbrev)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
- SortTuple stup;
-
- if (!tuplesort_gettuple_common(state, forward, &stup))
- stup.tuple = NULL;
-
- MemoryContextSwitchTo(oldcontext);
-
- if (stup.tuple)
- {
- /* Record abbreviated key for caller */
- if (base->sortKeys->abbrev_converter && abbrev)
- *abbrev = stup.datum1;
-
- if (copy)
- stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple);
-
- ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy);
- return true;
- }
- else
- {
- ExecClearTuple(slot);
- return false;
- }
-}
-
-/*
- * Fetch the next tuple in either forward or back direction.
- * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory
- * context, and must not be freed by caller. Caller may not rely on tuple
- * remaining valid after any further manipulation of tuplesort.
- */
-HeapTuple
-tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
- SortTuple stup;
-
- if (!tuplesort_gettuple_common(state, forward, &stup))
- stup.tuple = NULL;
-
- MemoryContextSwitchTo(oldcontext);
-
- return stup.tuple;
-}
-
-/*
- * Fetch the next index tuple in either forward or back direction.
- * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory
- * context, and must not be freed by caller. Caller may not rely on tuple
- * remaining valid after any further manipulation of tuplesort.
- */
-IndexTuple
-tuplesort_getindextuple(Tuplesortstate *state, bool forward)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
- SortTuple stup;
-
- if (!tuplesort_gettuple_common(state, forward, &stup))
- stup.tuple = NULL;
-
- MemoryContextSwitchTo(oldcontext);
-
- return (IndexTuple) stup.tuple;
-}
-
-/*
- * Fetch the next Datum in either forward or back direction.
- * Returns false if no more datums.
- *
- * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
- * in caller's context, and is now owned by the caller (this differs from
- * similar routines for other types of tuplesorts).
- *
- * Caller may optionally be passed back abbreviated value (on true return
- * value) when abbreviation was used, which can be used to cheaply avoid
- * equality checks that might otherwise be required. Caller can safely make a
- * determination of "non-equal tuple" based on simple binary inequality. A
- * NULL value will have a zeroed abbreviated value representation, which caller
- * may rely on in abbreviated inequality check.
- */
-bool
-tuplesort_getdatum(Tuplesortstate *state, bool forward,
- Datum *val, bool *isNull, Datum *abbrev)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
- TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
- SortTuple stup;
-
- if (!tuplesort_gettuple_common(state, forward, &stup))
- {
- MemoryContextSwitchTo(oldcontext);
- return false;
- }
-
- /* Ensure we copy into caller's memory context */
- MemoryContextSwitchTo(oldcontext);
-
- /* Record abbreviated key for caller */
- if (base->sortKeys->abbrev_converter && abbrev)
- *abbrev = stup.datum1;
+ return true;
+ }
+ return false;
- if (stup.isnull1 || !base->tuples)
- {
- *val = stup.datum1;
- *isNull = stup.isnull1;
- }
- else
- {
- /* use stup.tuple because stup.datum1 may be an abbreviation */
- *val = datumCopy(PointerGetDatum(stup.tuple), false, arg->datumTypeLen);
- *isNull = false;
+ default:
+ elog(ERROR, "invalid tuplesort state");
+ return false; /* keep compiler quiet */
}
-
- return true;
}
+
/*
* Advance over N tuples in either forward or back direction,
* without returning any data. N==0 is a no-op.
* We use next free slot from the slab allocator, or palloc() if the tuple
* is too large for that.
*/
-static void *
-readtup_alloc(Tuplesortstate *state, Size tuplen)
+void *
+tuplesort_readtup_alloc(Tuplesortstate *state, Size tuplen)
{
SlabSlot *buf;
}
-/*
- * Routines specialized for HeapTuple (actually MinimalTuple) case
- */
-
-static void
-removeabbrev_heap(Tuplesortstate *state, SortTuple *stups, int count)
-{
- int i;
- TuplesortPublic *base = TuplesortstateGetPublic(state);
-
- for (i = 0; i < count; i++)
- {
- HeapTupleData htup;
-
- htup.t_len = ((MinimalTuple) stups[i].tuple)->t_len +
- MINIMAL_TUPLE_OFFSET;
- htup.t_data = (HeapTupleHeader) ((char *) stups[i].tuple -
- MINIMAL_TUPLE_OFFSET);
- stups[i].datum1 = heap_getattr(&htup,
- base->sortKeys[0].ssup_attno,
- (TupleDesc) base->arg,
- &stups[i].isnull1);
- }
-}
-
-static int
-comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- SortSupport sortKey = base->sortKeys;
- HeapTupleData ltup;
- HeapTupleData rtup;
- TupleDesc tupDesc;
- int nkey;
- int32 compare;
- AttrNumber attno;
- Datum datum1,
- datum2;
- bool isnull1,
- isnull2;
-
-
- /* Compare the leading sort key */
- compare = ApplySortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- sortKey);
- if (compare != 0)
- return compare;
-
- /* Compare additional sort keys */
- ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
- ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET);
- rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
- rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
- tupDesc = (TupleDesc) base->arg;
-
- if (sortKey->abbrev_converter)
- {
- attno = sortKey->ssup_attno;
-
- datum1 = heap_getattr(<up, attno, tupDesc, &isnull1);
- datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
-
- compare = ApplySortAbbrevFullComparator(datum1, isnull1,
- datum2, isnull2,
- sortKey);
- if (compare != 0)
- return compare;
- }
-
- sortKey++;
- for (nkey = 1; nkey < base->nKeys; nkey++, sortKey++)
- {
- attno = sortKey->ssup_attno;
-
- datum1 = heap_getattr(<up, attno, tupDesc, &isnull1);
- datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
-
- compare = ApplySortComparator(datum1, isnull1,
- datum2, isnull2,
- sortKey);
- if (compare != 0)
- return compare;
- }
-
- return 0;
-}
-
-static void
-writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- MinimalTuple tuple = (MinimalTuple) stup->tuple;
-
- /* the part of the MinimalTuple we'll write: */
- char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
- unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
-
- /* total on-disk footprint: */
- unsigned int tuplen = tupbodylen + sizeof(int);
-
- LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
- LogicalTapeWrite(tape, (void *) tupbody, tupbodylen);
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
-}
-
-static void
-readtup_heap(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len)
-{
- unsigned int tupbodylen = len - sizeof(int);
- unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
- MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen);
- char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- HeapTupleData htup;
-
- /* read in the tuple proper */
- tuple->t_len = tuplen;
- LogicalTapeReadExact(tape, tupbody, tupbodylen);
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
- stup->tuple = (void *) tuple;
- /* set up first-column key value */
- htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
- htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
- stup->datum1 = heap_getattr(&htup,
- base->sortKeys[0].ssup_attno,
- (TupleDesc) base->arg,
- &stup->isnull1);
-}
-
-/*
- * Routines specialized for the CLUSTER case (HeapTuple data, with
- * comparisons per a btree index definition)
- */
-
-static void
-removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups, int count)
-{
- int i;
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
-
- for (i = 0; i < count; i++)
- {
- HeapTuple tup;
-
- tup = (HeapTuple) stups[i].tuple;
- stups[i].datum1 = heap_getattr(tup,
- arg->indexInfo->ii_IndexAttrNumbers[0],
- arg->tupDesc,
- &stups[i].isnull1);
- }
-}
-
-static int
-comparetup_cluster(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
- SortSupport sortKey = base->sortKeys;
- HeapTuple ltup;
- HeapTuple rtup;
- TupleDesc tupDesc;
- int nkey;
- int32 compare;
- Datum datum1,
- datum2;
- bool isnull1,
- isnull2;
-
- /* Be prepared to compare additional sort keys */
- ltup = (HeapTuple) a->tuple;
- rtup = (HeapTuple) b->tuple;
- tupDesc = arg->tupDesc;
-
- /* Compare the leading sort key, if it's simple */
- if (base->haveDatum1)
- {
- compare = ApplySortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- sortKey);
- if (compare != 0)
- return compare;
-
- if (sortKey->abbrev_converter)
- {
- AttrNumber leading = arg->indexInfo->ii_IndexAttrNumbers[0];
-
- datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
- datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
-
- compare = ApplySortAbbrevFullComparator(datum1, isnull1,
- datum2, isnull2,
- sortKey);
- }
- if (compare != 0 || base->nKeys == 1)
- return compare;
- /* Compare additional columns the hard way */
- sortKey++;
- nkey = 1;
- }
- else
- {
- /* Must compare all keys the hard way */
- nkey = 0;
- }
-
- if (arg->indexInfo->ii_Expressions == NULL)
- {
- /* If not expression index, just compare the proper heap attrs */
-
- for (; nkey < base->nKeys; nkey++, sortKey++)
- {
- AttrNumber attno = arg->indexInfo->ii_IndexAttrNumbers[nkey];
-
- datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
- datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
-
- compare = ApplySortComparator(datum1, isnull1,
- datum2, isnull2,
- sortKey);
- if (compare != 0)
- return compare;
- }
- }
- else
- {
- /*
- * In the expression index case, compute the whole index tuple and
- * then compare values. It would perhaps be faster to compute only as
- * many columns as we need to compare, but that would require
- * duplicating all the logic in FormIndexDatum.
- */
- Datum l_index_values[INDEX_MAX_KEYS];
- bool l_index_isnull[INDEX_MAX_KEYS];
- Datum r_index_values[INDEX_MAX_KEYS];
- bool r_index_isnull[INDEX_MAX_KEYS];
- TupleTableSlot *ecxt_scantuple;
-
- /* Reset context each time to prevent memory leakage */
- ResetPerTupleExprContext(arg->estate);
-
- ecxt_scantuple = GetPerTupleExprContext(arg->estate)->ecxt_scantuple;
-
- ExecStoreHeapTuple(ltup, ecxt_scantuple, false);
- FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
- l_index_values, l_index_isnull);
-
- ExecStoreHeapTuple(rtup, ecxt_scantuple, false);
- FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
- r_index_values, r_index_isnull);
-
- for (; nkey < base->nKeys; nkey++, sortKey++)
- {
- compare = ApplySortComparator(l_index_values[nkey],
- l_index_isnull[nkey],
- r_index_values[nkey],
- r_index_isnull[nkey],
- sortKey);
- if (compare != 0)
- return compare;
- }
- }
-
- return 0;
-}
-
-static void
-writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- HeapTuple tuple = (HeapTuple) stup->tuple;
- unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);
-
- /* We need to store t_self, but not other fields of HeapTupleData */
- LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
- LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
- LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
-}
-
-static void
-readtup_cluster(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int tuplen)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
- unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
- HeapTuple tuple = (HeapTuple) readtup_alloc(state,
- t_len + HEAPTUPLESIZE);
-
- /* Reconstruct the HeapTupleData header */
- tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
- tuple->t_len = t_len;
- LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
- /* We don't currently bother to reconstruct t_tableOid */
- tuple->t_tableOid = InvalidOid;
- /* Read in the tuple body */
- LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len);
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
- stup->tuple = (void *) tuple;
- /* set up first-column key value, if it's a simple column */
- if (base->haveDatum1)
- stup->datum1 = heap_getattr(tuple,
- arg->indexInfo->ii_IndexAttrNumbers[0],
- arg->tupDesc,
- &stup->isnull1);
-}
-
-static void
-freestate_cluster(Tuplesortstate *state)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
-
- /* Free any execution state created for CLUSTER case */
- if (arg->estate != NULL)
- {
- ExprContext *econtext = GetPerTupleExprContext(arg->estate);
-
- ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple);
- FreeExecutorState(arg->estate);
- }
-}
-
-/*
- * Routines specialized for IndexTuple case
- *
- * The btree and hash cases require separate comparison functions, but the
- * IndexTuple representation is the same so the copy/write/read support
- * functions can be shared.
- */
-
-static void
-removeabbrev_index(Tuplesortstate *state, SortTuple *stups, int count)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
- int i;
-
- for (i = 0; i < count; i++)
- {
- IndexTuple tuple;
-
- tuple = stups[i].tuple;
- stups[i].datum1 = index_getattr(tuple,
- 1,
- RelationGetDescr(arg->indexRel),
- &stups[i].isnull1);
- }
-}
-
-static int
-comparetup_index_btree(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state)
-{
- /*
- * This is similar to comparetup_heap(), but expects index tuples. There
- * is also special handling for enforcing uniqueness, and special
- * treatment for equal keys at the end.
- */
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortIndexBTreeArg *arg = (TuplesortIndexBTreeArg *) base->arg;
- SortSupport sortKey = base->sortKeys;
- IndexTuple tuple1;
- IndexTuple tuple2;
- int keysz;
- TupleDesc tupDes;
- bool equal_hasnull = false;
- int nkey;
- int32 compare;
- Datum datum1,
- datum2;
- bool isnull1,
- isnull2;
-
-
- /* Compare the leading sort key */
- compare = ApplySortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- sortKey);
- if (compare != 0)
- return compare;
-
- /* Compare additional sort keys */
- tuple1 = (IndexTuple) a->tuple;
- tuple2 = (IndexTuple) b->tuple;
- keysz = base->nKeys;
- tupDes = RelationGetDescr(arg->index.indexRel);
-
- if (sortKey->abbrev_converter)
- {
- datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
- datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
-
- compare = ApplySortAbbrevFullComparator(datum1, isnull1,
- datum2, isnull2,
- sortKey);
- if (compare != 0)
- return compare;
- }
-
- /* they are equal, so we only need to examine one null flag */
- if (a->isnull1)
- equal_hasnull = true;
-
- sortKey++;
- for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
- {
- datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
- datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);
-
- compare = ApplySortComparator(datum1, isnull1,
- datum2, isnull2,
- sortKey);
- if (compare != 0)
- return compare; /* done when we find unequal attributes */
-
- /* they are equal, so we only need to examine one null flag */
- if (isnull1)
- equal_hasnull = true;
- }
-
- /*
- * If btree has asked us to enforce uniqueness, complain if two equal
- * tuples are detected (unless there was at least one NULL field and NULLS
- * NOT DISTINCT was not set).
- *
- * It is sufficient to make the test here, because if two tuples are equal
- * they *must* get compared at some stage of the sort --- otherwise the
- * sort algorithm wouldn't have checked whether one must appear before the
- * other.
- */
- if (arg->enforceUnique && !(!arg->uniqueNullsNotDistinct && equal_hasnull))
- {
- Datum values[INDEX_MAX_KEYS];
- bool isnull[INDEX_MAX_KEYS];
- char *key_desc;
-
- /*
- * Some rather brain-dead implementations of qsort (such as the one in
- * QNX 4) will sometimes call the comparison routine to compare a
- * value to itself, but we always use our own implementation, which
- * does not.
- */
- Assert(tuple1 != tuple2);
-
- index_deform_tuple(tuple1, tupDes, values, isnull);
-
- key_desc = BuildIndexValueDescription(arg->index.indexRel, values, isnull);
-
- ereport(ERROR,
- (errcode(ERRCODE_UNIQUE_VIOLATION),
- errmsg("could not create unique index \"%s\"",
- RelationGetRelationName(arg->index.indexRel)),
- key_desc ? errdetail("Key %s is duplicated.", key_desc) :
- errdetail("Duplicate keys exist."),
- errtableconstraint(arg->index.heapRel,
- RelationGetRelationName(arg->index.indexRel))));
- }
-
- /*
- * If key values are equal, we sort on ItemPointer. This is required for
- * btree indexes, since heap TID is treated as an implicit last key
- * attribute in order to ensure that all keys in the index are physically
- * unique.
- */
- {
- BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
- BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
-
- if (blk1 != blk2)
- return (blk1 < blk2) ? -1 : 1;
- }
- {
- OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
- OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
-
- if (pos1 != pos2)
- return (pos1 < pos2) ? -1 : 1;
- }
-
- /* ItemPointer values should never be equal */
- Assert(false);
-
- return 0;
-}
-
-static int
-comparetup_index_hash(const SortTuple *a, const SortTuple *b,
- Tuplesortstate *state)
-{
- Bucket bucket1;
- Bucket bucket2;
- IndexTuple tuple1;
- IndexTuple tuple2;
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortIndexHashArg *arg = (TuplesortIndexHashArg *) base->arg;
-
- /*
- * Fetch hash keys and mask off bits we don't want to sort by. We know
- * that the first column of the index tuple is the hash key.
- */
- Assert(!a->isnull1);
- bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
- arg->max_buckets, arg->high_mask,
- arg->low_mask);
- Assert(!b->isnull1);
- bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
- arg->max_buckets, arg->high_mask,
- arg->low_mask);
- if (bucket1 > bucket2)
- return 1;
- else if (bucket1 < bucket2)
- return -1;
-
- /*
- * If hash values are equal, we sort on ItemPointer. This does not affect
- * validity of the finished index, but it may be useful to have index
- * scans in physical order.
- */
- tuple1 = (IndexTuple) a->tuple;
- tuple2 = (IndexTuple) b->tuple;
-
- {
- BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
- BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
-
- if (blk1 != blk2)
- return (blk1 < blk2) ? -1 : 1;
- }
- {
- OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
- OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
-
- if (pos1 != pos2)
- return (pos1 < pos2) ? -1 : 1;
- }
-
- /* ItemPointer values should never be equal */
- Assert(false);
-
- return 0;
-}
-
-static void
-writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- IndexTuple tuple = (IndexTuple) stup->tuple;
- unsigned int tuplen;
-
- tuplen = IndexTupleSize(tuple) + sizeof(tuplen);
- LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
- LogicalTapeWrite(tape, (void *) tuple, IndexTupleSize(tuple));
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
-}
-
-static void
-readtup_index(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
- unsigned int tuplen = len - sizeof(unsigned int);
- IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen);
-
- LogicalTapeReadExact(tape, tuple, tuplen);
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
- stup->tuple = (void *) tuple;
- /* set up first-column key value */
- stup->datum1 = index_getattr(tuple,
- 1,
- RelationGetDescr(arg->indexRel),
- &stup->isnull1);
-}
-
-/*
- * Routines specialized for DatumTuple case
- */
-
-static void
-removeabbrev_datum(Tuplesortstate *state, SortTuple *stups, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- stups[i].datum1 = PointerGetDatum(stups[i].tuple);
-}
-
-static int
-comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- int compare;
-
- compare = ApplySortComparator(a->datum1, a->isnull1,
- b->datum1, b->isnull1,
- base->sortKeys);
- if (compare != 0)
- return compare;
-
- /* if we have abbreviations, then "tuple" has the original value */
-
- if (base->sortKeys->abbrev_converter)
- compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
- PointerGetDatum(b->tuple), b->isnull1,
- base->sortKeys);
-
- return compare;
-}
-
-static void
-writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
- void *waddr;
- unsigned int tuplen;
- unsigned int writtenlen;
-
- if (stup->isnull1)
- {
- waddr = NULL;
- tuplen = 0;
- }
- else if (!base->tuples)
- {
- waddr = &stup->datum1;
- tuplen = sizeof(Datum);
- }
- else
- {
- waddr = stup->tuple;
- tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, arg->datumTypeLen);
- Assert(tuplen != 0);
- }
-
- writtenlen = tuplen + sizeof(unsigned int);
-
- LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen));
- LogicalTapeWrite(tape, waddr, tuplen);
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen));
-}
-
-static void
-readtup_datum(Tuplesortstate *state, SortTuple *stup,
- LogicalTape *tape, unsigned int len)
-{
- TuplesortPublic *base = TuplesortstateGetPublic(state);
- unsigned int tuplen = len - sizeof(unsigned int);
-
- if (tuplen == 0)
- {
- /* it's NULL */
- stup->datum1 = (Datum) 0;
- stup->isnull1 = true;
- stup->tuple = NULL;
- }
- else if (!base->tuples)
- {
- Assert(tuplen == sizeof(Datum));
- LogicalTapeReadExact(tape, &stup->datum1, tuplen);
- stup->isnull1 = false;
- stup->tuple = NULL;
- }
- else
- {
- void *raddr = readtup_alloc(state, tuplen);
-
- LogicalTapeReadExact(tape, raddr, tuplen);
- stup->datum1 = PointerGetDatum(raddr);
- stup->isnull1 = false;
- stup->tuple = raddr;
- }
-
- if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
- LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
-}
-
/*
* Parallel sort routines
*/
--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * tuplesortvariants.c
+ * Implementation of tuple sorting variants.
+ *
+ * This module handles the sorting of heap tuples, index tuples, or single
+ * Datums. The implementation is based on the generalized tuple sorting
+ * facility given in tuplesort.c. Support other kinds of sortable objects
+ * could be easily added here, another module, or even an extension.
+ *
+ *
+ * Copyright (c) 2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/sort/tuplesortvariants.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "catalog/index.h"
+#include "executor/executor.h"
+#include "pg_trace.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/guc.h"
+#include "utils/tuplesort.h"
+
+
+/* sort-type codes for sort__start probes */
+#define HEAP_SORT 0
+#define INDEX_SORT 1
+#define DATUM_SORT 2
+#define CLUSTER_SORT 3
+
+static void removeabbrev_heap(Tuplesortstate *state, SortTuple *stups,
+ int count);
+static void removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups,
+ int count);
+static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
+ int count);
+static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
+ int count);
+static int comparetup_heap(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
+static void writetup_heap(Tuplesortstate *state, LogicalTape *tape,
+ SortTuple *stup);
+static void readtup_heap(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len);
+static int comparetup_cluster(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
+static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape,
+ SortTuple *stup);
+static void readtup_cluster(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len);
+static int comparetup_index_btree(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
+static int comparetup_index_hash(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
+static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
+ SortTuple *stup);
+static void readtup_index(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len);
+static int comparetup_datum(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
+static void writetup_datum(Tuplesortstate *state, LogicalTape *tape,
+ SortTuple *stup);
+static void readtup_datum(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len);
+static void freestate_cluster(Tuplesortstate *state);
+
+/*
+ * Data struture pointed by "TuplesortPublic.arg" for the CLUSTER case. Set by
+ * the tuplesort_begin_cluster.
+ */
+typedef struct
+{
+ TupleDesc tupDesc;
+
+ IndexInfo *indexInfo; /* info about index being used for reference */
+ EState *estate; /* for evaluating index expressions */
+} TuplesortClusterArg;
+
+/*
+ * Data struture pointed by "TuplesortPublic.arg" for the IndexTuple case.
+ * Set by tuplesort_begin_index_xxx and used only by the IndexTuple routines.
+ */
+typedef struct
+{
+ Relation heapRel; /* table the index is being built on */
+ Relation indexRel; /* index being built */
+} TuplesortIndexArg;
+
+/*
+ * Data struture pointed by "TuplesortPublic.arg" for the index_btree subcase.
+ */
+typedef struct
+{
+ TuplesortIndexArg index;
+
+ bool enforceUnique; /* complain if we find duplicate tuples */
+ bool uniqueNullsNotDistinct; /* unique constraint null treatment */
+} TuplesortIndexBTreeArg;
+
+/*
+ * Data struture pointed by "TuplesortPublic.arg" for the index_hash subcase.
+ */
+typedef struct
+{
+ TuplesortIndexArg index;
+
+ uint32 high_mask; /* masks for sortable part of hash code */
+ uint32 low_mask;
+ uint32 max_buckets;
+} TuplesortIndexHashArg;
+
+/*
+ * Data struture pointed by "TuplesortPublic.arg" for the Datum case.
+ * Set by tuplesort_begin_datum and used only by the DatumTuple routines.
+ */
+typedef struct
+{
+ /* the datatype oid of Datum's to be sorted */
+ Oid datumType;
+ /* we need typelen in order to know how to copy the Datums. */
+ int datumTypeLen;
+} TuplesortDatumArg;
+
+Tuplesortstate *
+tuplesort_begin_heap(TupleDesc tupDesc,
+ int nkeys, AttrNumber *attNums,
+ Oid *sortOperators, Oid *sortCollations,
+ bool *nullsFirstFlags,
+ int workMem, SortCoordinate coordinate, int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext;
+ int i;
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+
+ AssertArg(nkeys > 0);
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
+ nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ base->nKeys = nkeys;
+
+ TRACE_POSTGRESQL_SORT_START(HEAP_SORT,
+ false, /* no unique check */
+ nkeys,
+ workMem,
+ sortopt & TUPLESORT_RANDOMACCESS,
+ PARALLEL_SORT(coordinate));
+
+ base->removeabbrev = removeabbrev_heap;
+ base->comparetup = comparetup_heap;
+ base->writetup = writetup_heap;
+ base->readtup = readtup_heap;
+ base->haveDatum1 = true;
+ base->arg = tupDesc; /* assume we need not copy tupDesc */
+
+ /* Prepare SortSupport data for each column */
+ base->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
+
+ for (i = 0; i < nkeys; i++)
+ {
+ SortSupport sortKey = base->sortKeys + i;
+
+ AssertArg(attNums[i] != 0);
+ AssertArg(sortOperators[i] != 0);
+
+ sortKey->ssup_cxt = CurrentMemoryContext;
+ sortKey->ssup_collation = sortCollations[i];
+ sortKey->ssup_nulls_first = nullsFirstFlags[i];
+ sortKey->ssup_attno = attNums[i];
+ /* Convey if abbreviation optimization is applicable in principle */
+ sortKey->abbreviate = (i == 0 && base->haveDatum1);
+
+ PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
+ }
+
+ /*
+ * The "onlyKey" optimization cannot be used with abbreviated keys, since
+ * tie-breaker comparisons may be required. Typically, the optimization
+ * is only of value to pass-by-value types anyway, whereas abbreviated
+ * keys are typically only of value to pass-by-reference types.
+ */
+ if (nkeys == 1 && !base->sortKeys->abbrev_converter)
+ base->onlyKey = base->sortKeys;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Tuplesortstate *
+tuplesort_begin_cluster(TupleDesc tupDesc,
+ Relation indexRel,
+ int workMem,
+ SortCoordinate coordinate, int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ BTScanInsert indexScanKey;
+ MemoryContext oldcontext;
+ TuplesortClusterArg *arg;
+ int i;
+
+ Assert(indexRel->rd_rel->relam == BTREE_AM_OID);
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+ arg = (TuplesortClusterArg *) palloc0(sizeof(TuplesortClusterArg));
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c",
+ RelationGetNumberOfAttributes(indexRel),
+ workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+ TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT,
+ false, /* no unique check */
+ base->nKeys,
+ workMem,
+ sortopt & TUPLESORT_RANDOMACCESS,
+ PARALLEL_SORT(coordinate));
+
+ base->removeabbrev = removeabbrev_cluster;
+ base->comparetup = comparetup_cluster;
+ base->writetup = writetup_cluster;
+ base->readtup = readtup_cluster;
+ base->freestate = freestate_cluster;
+ base->arg = arg;
+
+ arg->indexInfo = BuildIndexInfo(indexRel);
+
+ /*
+ * If we don't have a simple leading attribute, we don't currently
+ * initialize datum1, so disable optimizations that require it.
+ */
+ if (arg->indexInfo->ii_IndexAttrNumbers[0] == 0)
+ base->haveDatum1 = false;
+ else
+ base->haveDatum1 = true;
+
+ arg->tupDesc = tupDesc; /* assume we need not copy tupDesc */
+
+ indexScanKey = _bt_mkscankey(indexRel, NULL);
+
+ if (arg->indexInfo->ii_Expressions != NULL)
+ {
+ TupleTableSlot *slot;
+ ExprContext *econtext;
+
+ /*
+ * We will need to use FormIndexDatum to evaluate the index
+ * expressions. To do that, we need an EState, as well as a
+ * TupleTableSlot to put the table tuples into. The econtext's
+ * scantuple has to point to that slot, too.
+ */
+ arg->estate = CreateExecutorState();
+ slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple);
+ econtext = GetPerTupleExprContext(arg->estate);
+ econtext->ecxt_scantuple = slot;
+ }
+
+ /* Prepare SortSupport data for each column */
+ base->sortKeys = (SortSupport) palloc0(base->nKeys *
+ sizeof(SortSupportData));
+
+ for (i = 0; i < base->nKeys; i++)
+ {
+ SortSupport sortKey = base->sortKeys + i;
+ ScanKey scanKey = indexScanKey->scankeys + i;
+ int16 strategy;
+
+ sortKey->ssup_cxt = CurrentMemoryContext;
+ sortKey->ssup_collation = scanKey->sk_collation;
+ sortKey->ssup_nulls_first =
+ (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
+ sortKey->ssup_attno = scanKey->sk_attno;
+ /* Convey if abbreviation optimization is applicable in principle */
+ sortKey->abbreviate = (i == 0 && base->haveDatum1);
+
+ AssertState(sortKey->ssup_attno != 0);
+
+ strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
+ BTGreaterStrategyNumber : BTLessStrategyNumber;
+
+ PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
+ }
+
+ pfree(indexScanKey);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Tuplesortstate *
+tuplesort_begin_index_btree(Relation heapRel,
+ Relation indexRel,
+ bool enforceUnique,
+ bool uniqueNullsNotDistinct,
+ int workMem,
+ SortCoordinate coordinate,
+ int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ BTScanInsert indexScanKey;
+ TuplesortIndexBTreeArg *arg;
+ MemoryContext oldcontext;
+ int i;
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+ arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin index sort: unique = %c, workMem = %d, randomAccess = %c",
+ enforceUnique ? 't' : 'f',
+ workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+ TRACE_POSTGRESQL_SORT_START(INDEX_SORT,
+ enforceUnique,
+ base->nKeys,
+ workMem,
+ sortopt & TUPLESORT_RANDOMACCESS,
+ PARALLEL_SORT(coordinate));
+
+ base->removeabbrev = removeabbrev_index;
+ base->comparetup = comparetup_index_btree;
+ base->writetup = writetup_index;
+ base->readtup = readtup_index;
+ base->haveDatum1 = true;
+ base->arg = arg;
+
+ arg->index.heapRel = heapRel;
+ arg->index.indexRel = indexRel;
+ arg->enforceUnique = enforceUnique;
+ arg->uniqueNullsNotDistinct = uniqueNullsNotDistinct;
+
+ indexScanKey = _bt_mkscankey(indexRel, NULL);
+
+ /* Prepare SortSupport data for each column */
+ base->sortKeys = (SortSupport) palloc0(base->nKeys *
+ sizeof(SortSupportData));
+
+ for (i = 0; i < base->nKeys; i++)
+ {
+ SortSupport sortKey = base->sortKeys + i;
+ ScanKey scanKey = indexScanKey->scankeys + i;
+ int16 strategy;
+
+ sortKey->ssup_cxt = CurrentMemoryContext;
+ sortKey->ssup_collation = scanKey->sk_collation;
+ sortKey->ssup_nulls_first =
+ (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
+ sortKey->ssup_attno = scanKey->sk_attno;
+ /* Convey if abbreviation optimization is applicable in principle */
+ sortKey->abbreviate = (i == 0 && base->haveDatum1);
+
+ AssertState(sortKey->ssup_attno != 0);
+
+ strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ?
+ BTGreaterStrategyNumber : BTLessStrategyNumber;
+
+ PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey);
+ }
+
+ pfree(indexScanKey);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Tuplesortstate *
+tuplesort_begin_index_hash(Relation heapRel,
+ Relation indexRel,
+ uint32 high_mask,
+ uint32 low_mask,
+ uint32 max_buckets,
+ int workMem,
+ SortCoordinate coordinate,
+ int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext;
+ TuplesortIndexHashArg *arg;
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+ arg = (TuplesortIndexHashArg *) palloc(sizeof(TuplesortIndexHashArg));
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin index sort: high_mask = 0x%x, low_mask = 0x%x, "
+ "max_buckets = 0x%x, workMem = %d, randomAccess = %c",
+ high_mask,
+ low_mask,
+ max_buckets,
+ workMem,
+ sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ base->nKeys = 1; /* Only one sort column, the hash code */
+
+ base->removeabbrev = removeabbrev_index;
+ base->comparetup = comparetup_index_hash;
+ base->writetup = writetup_index;
+ base->readtup = readtup_index;
+ base->haveDatum1 = true;
+ base->arg = arg;
+
+ arg->index.heapRel = heapRel;
+ arg->index.indexRel = indexRel;
+
+ arg->high_mask = high_mask;
+ arg->low_mask = low_mask;
+ arg->max_buckets = max_buckets;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Tuplesortstate *
+tuplesort_begin_index_gist(Relation heapRel,
+ Relation indexRel,
+ int workMem,
+ SortCoordinate coordinate,
+ int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext;
+ TuplesortIndexBTreeArg *arg;
+ int i;
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+ arg = (TuplesortIndexBTreeArg *) palloc(sizeof(TuplesortIndexBTreeArg));
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin index sort: workMem = %d, randomAccess = %c",
+ workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+ base->removeabbrev = removeabbrev_index;
+ base->comparetup = comparetup_index_btree;
+ base->writetup = writetup_index;
+ base->readtup = readtup_index;
+ base->haveDatum1 = true;
+ base->arg = arg;
+
+ arg->index.heapRel = heapRel;
+ arg->index.indexRel = indexRel;
+ arg->enforceUnique = false;
+ arg->uniqueNullsNotDistinct = false;
+
+ /* Prepare SortSupport data for each column */
+ base->sortKeys = (SortSupport) palloc0(base->nKeys *
+ sizeof(SortSupportData));
+
+ for (i = 0; i < base->nKeys; i++)
+ {
+ SortSupport sortKey = base->sortKeys + i;
+
+ sortKey->ssup_cxt = CurrentMemoryContext;
+ sortKey->ssup_collation = indexRel->rd_indcollation[i];
+ sortKey->ssup_nulls_first = false;
+ sortKey->ssup_attno = i + 1;
+ /* Convey if abbreviation optimization is applicable in principle */
+ sortKey->abbreviate = (i == 0 && base->haveDatum1);
+
+ AssertState(sortKey->ssup_attno != 0);
+
+ /* Look for a sort support function */
+ PrepareSortSupportFromGistIndexRel(indexRel, sortKey);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Tuplesortstate *
+tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
+ bool nullsFirstFlag, int workMem,
+ SortCoordinate coordinate, int sortopt)
+{
+ Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+ sortopt);
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortDatumArg *arg;
+ MemoryContext oldcontext;
+ int16 typlen;
+ bool typbyval;
+
+ oldcontext = MemoryContextSwitchTo(base->maincontext);
+ arg = (TuplesortDatumArg *) palloc(sizeof(TuplesortDatumArg));
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "begin datum sort: workMem = %d, randomAccess = %c",
+ workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+ base->nKeys = 1; /* always a one-column sort */
+
+ TRACE_POSTGRESQL_SORT_START(DATUM_SORT,
+ false, /* no unique check */
+ 1,
+ workMem,
+ sortopt & TUPLESORT_RANDOMACCESS,
+ PARALLEL_SORT(coordinate));
+
+ base->removeabbrev = removeabbrev_datum;
+ base->comparetup = comparetup_datum;
+ base->writetup = writetup_datum;
+ base->readtup = readtup_datum;
+ base->haveDatum1 = true;
+ base->arg = arg;
+
+ arg->datumType = datumType;
+
+ /* lookup necessary attributes of the datum type */
+ get_typlenbyval(datumType, &typlen, &typbyval);
+ arg->datumTypeLen = typlen;
+ base->tuples = !typbyval;
+
+ /* Prepare SortSupport data */
+ base->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData));
+
+ base->sortKeys->ssup_cxt = CurrentMemoryContext;
+ base->sortKeys->ssup_collation = sortCollation;
+ base->sortKeys->ssup_nulls_first = nullsFirstFlag;
+
+ /*
+ * Abbreviation is possible here only for by-reference types. In theory,
+ * a pass-by-value datatype could have an abbreviated form that is cheaper
+ * to compare. In a tuple sort, we could support that, because we can
+ * always extract the original datum from the tuple as needed. Here, we
+ * can't, because a datum sort only stores a single copy of the datum; the
+ * "tuple" field of each SortTuple is NULL.
+ */
+ base->sortKeys->abbreviate = !typbyval;
+
+ PrepareSortSupportFromOrderingOp(sortOperator, base->sortKeys);
+
+ /*
+ * The "onlyKey" optimization cannot be used with abbreviated keys, since
+ * tie-breaker comparisons may be required. Typically, the optimization
+ * is only of value to pass-by-value types anyway, whereas abbreviated
+ * keys are typically only of value to pass-by-reference types.
+ */
+ if (!base->sortKeys->abbrev_converter)
+ base->onlyKey = base->sortKeys;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+/*
+ * Accept one tuple while collecting input data for sort.
+ *
+ * Note that the input data is always copied; the caller need not save it.
+ */
+void
+tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+ TupleDesc tupDesc = (TupleDesc) base->arg;
+ SortTuple stup;
+ MinimalTuple tuple;
+ HeapTupleData htup;
+
+ /* copy the tuple into sort storage */
+ tuple = ExecCopySlotMinimalTuple(slot);
+ stup.tuple = (void *) tuple;
+ /* set up first-column key value */
+ htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
+ htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
+ stup.datum1 = heap_getattr(&htup,
+ base->sortKeys[0].ssup_attno,
+ tupDesc,
+ &stup.isnull1);
+
+ tuplesort_puttuple_common(state, &stup,
+ base->sortKeys->abbrev_converter &&
+ !stup.isnull1);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Accept one tuple while collecting input data for sort.
+ *
+ * Note that the input data is always copied; the caller need not save it.
+ */
+void
+tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup)
+{
+ SortTuple stup;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+ TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
+
+ /* copy the tuple into sort storage */
+ tup = heap_copytuple(tup);
+ stup.tuple = (void *) tup;
+
+ /*
+ * set up first-column key value, and potentially abbreviate, if it's a
+ * simple column
+ */
+ if (base->haveDatum1)
+ {
+ stup.datum1 = heap_getattr(tup,
+ arg->indexInfo->ii_IndexAttrNumbers[0],
+ arg->tupDesc,
+ &stup.isnull1);
+ }
+
+ tuplesort_puttuple_common(state, &stup,
+ base->haveDatum1 &&
+ base->sortKeys->abbrev_converter &&
+ !stup.isnull1);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Collect one index tuple while collecting input data for sort, building
+ * it from caller-supplied values.
+ */
+void
+tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
+ ItemPointer self, Datum *values,
+ bool *isnull)
+{
+ SortTuple stup;
+ IndexTuple tuple;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
+
+ stup.tuple = index_form_tuple_context(RelationGetDescr(rel), values,
+ isnull, base->tuplecontext);
+ tuple = ((IndexTuple) stup.tuple);
+ tuple->t_tid = *self;
+ /* set up first-column key value */
+ stup.datum1 = index_getattr(tuple,
+ 1,
+ RelationGetDescr(arg->indexRel),
+ &stup.isnull1);
+
+ tuplesort_puttuple_common(state, &stup,
+ base->sortKeys &&
+ base->sortKeys->abbrev_converter &&
+ !stup.isnull1);
+}
+
+/*
+ * Accept one Datum while collecting input data for sort.
+ *
+ * If the Datum is pass-by-ref type, the value will be copied.
+ */
+void
+tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+ TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
+ SortTuple stup;
+
+ /*
+ * Pass-by-value types or null values are just stored directly in
+ * stup.datum1 (and stup.tuple is not used and set to NULL).
+ *
+ * Non-null pass-by-reference values need to be copied into memory we
+ * control, and possibly abbreviated. The copied value is pointed to by
+ * stup.tuple and is treated as the canonical copy (e.g. to return via
+ * tuplesort_getdatum or when writing to tape); stup.datum1 gets the
+ * abbreviated value if abbreviation is happening, otherwise it's
+ * identical to stup.tuple.
+ */
+
+ if (isNull || !base->tuples)
+ {
+ /*
+ * Set datum1 to zeroed representation for NULLs (to be consistent,
+ * and to support cheap inequality tests for NULL abbreviated keys).
+ */
+ stup.datum1 = !isNull ? val : (Datum) 0;
+ stup.isnull1 = isNull;
+ stup.tuple = NULL; /* no separate storage */
+ }
+ else
+ {
+ stup.isnull1 = false;
+ stup.datum1 = datumCopy(val, false, arg->datumTypeLen);
+ stup.tuple = DatumGetPointer(stup.datum1);
+ }
+
+ tuplesort_puttuple_common(state, &stup,
+ base->tuples &&
+ base->sortKeys->abbrev_converter && !isNull);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+/*
+ * Fetch the next tuple in either forward or back direction.
+ * If successful, put tuple in slot and return true; else, clear the slot
+ * and return false.
+ *
+ * Caller may optionally be passed back abbreviated value (on true return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required. Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality. A
+ * NULL value in leading attribute will set abbreviated value to zeroed
+ * representation, which caller may rely on in abbreviated inequality check.
+ *
+ * If copy is true, the slot receives a tuple that's been copied into the
+ * caller's memory context, so that it will stay valid regardless of future
+ * manipulations of the tuplesort's state (up to and including deleting the
+ * tuplesort). If copy is false, the slot will just receive a pointer to a
+ * tuple held within the tuplesort, which is more efficient, but only safe for
+ * callers that are prepared to have any subsequent manipulation of the
+ * tuplesort's state invalidate slot contents.
+ */
+bool
+tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy,
+ TupleTableSlot *slot, Datum *abbrev)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+ SortTuple stup;
+
+ if (!tuplesort_gettuple_common(state, forward, &stup))
+ stup.tuple = NULL;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ if (stup.tuple)
+ {
+ /* Record abbreviated key for caller */
+ if (base->sortKeys->abbrev_converter && abbrev)
+ *abbrev = stup.datum1;
+
+ if (copy)
+ stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple);
+
+ ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy);
+ return true;
+ }
+ else
+ {
+ ExecClearTuple(slot);
+ return false;
+ }
+}
+
+/*
+ * Fetch the next tuple in either forward or back direction.
+ * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory
+ * context, and must not be freed by caller. Caller may not rely on tuple
+ * remaining valid after any further manipulation of tuplesort.
+ */
+HeapTuple
+tuplesort_getheaptuple(Tuplesortstate *state, bool forward)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+ SortTuple stup;
+
+ if (!tuplesort_gettuple_common(state, forward, &stup))
+ stup.tuple = NULL;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return stup.tuple;
+}
+
+/*
+ * Fetch the next index tuple in either forward or back direction.
+ * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory
+ * context, and must not be freed by caller. Caller may not rely on tuple
+ * remaining valid after any further manipulation of tuplesort.
+ */
+IndexTuple
+tuplesort_getindextuple(Tuplesortstate *state, bool forward)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+ SortTuple stup;
+
+ if (!tuplesort_gettuple_common(state, forward, &stup))
+ stup.tuple = NULL;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return (IndexTuple) stup.tuple;
+}
+
+/*
+ * Fetch the next Datum in either forward or back direction.
+ * Returns false if no more datums.
+ *
+ * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
+ * in caller's context, and is now owned by the caller (this differs from
+ * similar routines for other types of tuplesorts).
+ *
+ * Caller may optionally be passed back abbreviated value (on true return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required. Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality. A
+ * NULL value will have a zeroed abbreviated value representation, which caller
+ * may rely on in abbreviated inequality check.
+ */
+bool
+tuplesort_getdatum(Tuplesortstate *state, bool forward,
+ Datum *val, bool *isNull, Datum *abbrev)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+ TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
+ SortTuple stup;
+
+ if (!tuplesort_gettuple_common(state, forward, &stup))
+ {
+ MemoryContextSwitchTo(oldcontext);
+ return false;
+ }
+
+ /* Ensure we copy into caller's memory context */
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Record abbreviated key for caller */
+ if (base->sortKeys->abbrev_converter && abbrev)
+ *abbrev = stup.datum1;
+
+ if (stup.isnull1 || !base->tuples)
+ {
+ *val = stup.datum1;
+ *isNull = stup.isnull1;
+ }
+ else
+ {
+ /* use stup.tuple because stup.datum1 may be an abbreviation */
+ *val = datumCopy(PointerGetDatum(stup.tuple), false, arg->datumTypeLen);
+ *isNull = false;
+ }
+
+ return true;
+}
+
+
+/*
+ * Routines specialized for HeapTuple (actually MinimalTuple) case
+ */
+
+static void
+removeabbrev_heap(Tuplesortstate *state, SortTuple *stups, int count)
+{
+ int i;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+
+ for (i = 0; i < count; i++)
+ {
+ HeapTupleData htup;
+
+ htup.t_len = ((MinimalTuple) stups[i].tuple)->t_len +
+ MINIMAL_TUPLE_OFFSET;
+ htup.t_data = (HeapTupleHeader) ((char *) stups[i].tuple -
+ MINIMAL_TUPLE_OFFSET);
+ stups[i].datum1 = heap_getattr(&htup,
+ base->sortKeys[0].ssup_attno,
+ (TupleDesc) base->arg,
+ &stups[i].isnull1);
+ }
+}
+
+static int
+comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ SortSupport sortKey = base->sortKeys;
+ HeapTupleData ltup;
+ HeapTupleData rtup;
+ TupleDesc tupDesc;
+ int nkey;
+ int32 compare;
+ AttrNumber attno;
+ Datum datum1,
+ datum2;
+ bool isnull1,
+ isnull2;
+
+
+ /* Compare the leading sort key */
+ compare = ApplySortComparator(a->datum1, a->isnull1,
+ b->datum1, b->isnull1,
+ sortKey);
+ if (compare != 0)
+ return compare;
+
+ /* Compare additional sort keys */
+ ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
+ ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET);
+ rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
+ rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
+ tupDesc = (TupleDesc) base->arg;
+
+ if (sortKey->abbrev_converter)
+ {
+ attno = sortKey->ssup_attno;
+
+ datum1 = heap_getattr(<up, attno, tupDesc, &isnull1);
+ datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
+
+ compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+ datum2, isnull2,
+ sortKey);
+ if (compare != 0)
+ return compare;
+ }
+
+ sortKey++;
+ for (nkey = 1; nkey < base->nKeys; nkey++, sortKey++)
+ {
+ attno = sortKey->ssup_attno;
+
+ datum1 = heap_getattr(<up, attno, tupDesc, &isnull1);
+ datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
+
+ compare = ApplySortComparator(datum1, isnull1,
+ datum2, isnull2,
+ sortKey);
+ if (compare != 0)
+ return compare;
+ }
+
+ return 0;
+}
+
+static void
+writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ MinimalTuple tuple = (MinimalTuple) stup->tuple;
+
+ /* the part of the MinimalTuple we'll write: */
+ char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
+ unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET;
+
+ /* total on-disk footprint: */
+ unsigned int tuplen = tupbodylen + sizeof(int);
+
+ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
+ LogicalTapeWrite(tape, (void *) tupbody, tupbodylen);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_heap(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len)
+{
+ unsigned int tupbodylen = len - sizeof(int);
+ unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET;
+ MinimalTuple tuple = (MinimalTuple) tuplesort_readtup_alloc(state, tuplen);
+ char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ HeapTupleData htup;
+
+ /* read in the tuple proper */
+ tuple->t_len = tuplen;
+ LogicalTapeReadExact(tape, tupbody, tupbodylen);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+ stup->tuple = (void *) tuple;
+ /* set up first-column key value */
+ htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
+ htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
+ stup->datum1 = heap_getattr(&htup,
+ base->sortKeys[0].ssup_attno,
+ (TupleDesc) base->arg,
+ &stup->isnull1);
+}
+
+/*
+ * Routines specialized for the CLUSTER case (HeapTuple data, with
+ * comparisons per a btree index definition)
+ */
+
+static void
+removeabbrev_cluster(Tuplesortstate *state, SortTuple *stups, int count)
+{
+ int i;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
+
+ for (i = 0; i < count; i++)
+ {
+ HeapTuple tup;
+
+ tup = (HeapTuple) stups[i].tuple;
+ stups[i].datum1 = heap_getattr(tup,
+ arg->indexInfo->ii_IndexAttrNumbers[0],
+ arg->tupDesc,
+ &stups[i].isnull1);
+ }
+}
+
+static int
+comparetup_cluster(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
+ SortSupport sortKey = base->sortKeys;
+ HeapTuple ltup;
+ HeapTuple rtup;
+ TupleDesc tupDesc;
+ int nkey;
+ int32 compare;
+ Datum datum1,
+ datum2;
+ bool isnull1,
+ isnull2;
+
+ /* Be prepared to compare additional sort keys */
+ ltup = (HeapTuple) a->tuple;
+ rtup = (HeapTuple) b->tuple;
+ tupDesc = arg->tupDesc;
+
+ /* Compare the leading sort key, if it's simple */
+ if (base->haveDatum1)
+ {
+ compare = ApplySortComparator(a->datum1, a->isnull1,
+ b->datum1, b->isnull1,
+ sortKey);
+ if (compare != 0)
+ return compare;
+
+ if (sortKey->abbrev_converter)
+ {
+ AttrNumber leading = arg->indexInfo->ii_IndexAttrNumbers[0];
+
+ datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
+ datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
+
+ compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+ datum2, isnull2,
+ sortKey);
+ }
+ if (compare != 0 || base->nKeys == 1)
+ return compare;
+ /* Compare additional columns the hard way */
+ sortKey++;
+ nkey = 1;
+ }
+ else
+ {
+ /* Must compare all keys the hard way */
+ nkey = 0;
+ }
+
+ if (arg->indexInfo->ii_Expressions == NULL)
+ {
+ /* If not expression index, just compare the proper heap attrs */
+
+ for (; nkey < base->nKeys; nkey++, sortKey++)
+ {
+ AttrNumber attno = arg->indexInfo->ii_IndexAttrNumbers[nkey];
+
+ datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
+ datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
+
+ compare = ApplySortComparator(datum1, isnull1,
+ datum2, isnull2,
+ sortKey);
+ if (compare != 0)
+ return compare;
+ }
+ }
+ else
+ {
+ /*
+ * In the expression index case, compute the whole index tuple and
+ * then compare values. It would perhaps be faster to compute only as
+ * many columns as we need to compare, but that would require
+ * duplicating all the logic in FormIndexDatum.
+ */
+ Datum l_index_values[INDEX_MAX_KEYS];
+ bool l_index_isnull[INDEX_MAX_KEYS];
+ Datum r_index_values[INDEX_MAX_KEYS];
+ bool r_index_isnull[INDEX_MAX_KEYS];
+ TupleTableSlot *ecxt_scantuple;
+
+ /* Reset context each time to prevent memory leakage */
+ ResetPerTupleExprContext(arg->estate);
+
+ ecxt_scantuple = GetPerTupleExprContext(arg->estate)->ecxt_scantuple;
+
+ ExecStoreHeapTuple(ltup, ecxt_scantuple, false);
+ FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
+ l_index_values, l_index_isnull);
+
+ ExecStoreHeapTuple(rtup, ecxt_scantuple, false);
+ FormIndexDatum(arg->indexInfo, ecxt_scantuple, arg->estate,
+ r_index_values, r_index_isnull);
+
+ for (; nkey < base->nKeys; nkey++, sortKey++)
+ {
+ compare = ApplySortComparator(l_index_values[nkey],
+ l_index_isnull[nkey],
+ r_index_values[nkey],
+ r_index_isnull[nkey],
+ sortKey);
+ if (compare != 0)
+ return compare;
+ }
+ }
+
+ return 0;
+}
+
+static void
+writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ HeapTuple tuple = (HeapTuple) stup->tuple;
+ unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int);
+
+ /* We need to store t_self, but not other fields of HeapTupleData */
+ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+ LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData));
+ LogicalTapeWrite(tape, tuple->t_data, tuple->t_len);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_cluster(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int tuplen)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
+ unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int);
+ HeapTuple tuple = (HeapTuple) tuplesort_readtup_alloc(state,
+ t_len + HEAPTUPLESIZE);
+
+ /* Reconstruct the HeapTupleData header */
+ tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE);
+ tuple->t_len = t_len;
+ LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData));
+ /* We don't currently bother to reconstruct t_tableOid */
+ tuple->t_tableOid = InvalidOid;
+ /* Read in the tuple body */
+ LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+ stup->tuple = (void *) tuple;
+ /* set up first-column key value, if it's a simple column */
+ if (base->haveDatum1)
+ stup->datum1 = heap_getattr(tuple,
+ arg->indexInfo->ii_IndexAttrNumbers[0],
+ arg->tupDesc,
+ &stup->isnull1);
+}
+
+static void
+freestate_cluster(Tuplesortstate *state)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortClusterArg *arg = (TuplesortClusterArg *) base->arg;
+
+ /* Free any execution state created for CLUSTER case */
+ if (arg->estate != NULL)
+ {
+ ExprContext *econtext = GetPerTupleExprContext(arg->estate);
+
+ ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple);
+ FreeExecutorState(arg->estate);
+ }
+}
+
+/*
+ * Routines specialized for IndexTuple case
+ *
+ * The btree and hash cases require separate comparison functions, but the
+ * IndexTuple representation is the same so the copy/write/read support
+ * functions can be shared.
+ */
+
+static void
+removeabbrev_index(Tuplesortstate *state, SortTuple *stups, int count)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
+ int i;
+
+ for (i = 0; i < count; i++)
+ {
+ IndexTuple tuple;
+
+ tuple = stups[i].tuple;
+ stups[i].datum1 = index_getattr(tuple,
+ 1,
+ RelationGetDescr(arg->indexRel),
+ &stups[i].isnull1);
+ }
+}
+
+static int
+comparetup_index_btree(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state)
+{
+ /*
+ * This is similar to comparetup_heap(), but expects index tuples. There
+ * is also special handling for enforcing uniqueness, and special
+ * treatment for equal keys at the end.
+ */
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortIndexBTreeArg *arg = (TuplesortIndexBTreeArg *) base->arg;
+ SortSupport sortKey = base->sortKeys;
+ IndexTuple tuple1;
+ IndexTuple tuple2;
+ int keysz;
+ TupleDesc tupDes;
+ bool equal_hasnull = false;
+ int nkey;
+ int32 compare;
+ Datum datum1,
+ datum2;
+ bool isnull1,
+ isnull2;
+
+
+ /* Compare the leading sort key */
+ compare = ApplySortComparator(a->datum1, a->isnull1,
+ b->datum1, b->isnull1,
+ sortKey);
+ if (compare != 0)
+ return compare;
+
+ /* Compare additional sort keys */
+ tuple1 = (IndexTuple) a->tuple;
+ tuple2 = (IndexTuple) b->tuple;
+ keysz = base->nKeys;
+ tupDes = RelationGetDescr(arg->index.indexRel);
+
+ if (sortKey->abbrev_converter)
+ {
+ datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
+ datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
+
+ compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+ datum2, isnull2,
+ sortKey);
+ if (compare != 0)
+ return compare;
+ }
+
+ /* they are equal, so we only need to examine one null flag */
+ if (a->isnull1)
+ equal_hasnull = true;
+
+ sortKey++;
+ for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
+ {
+ datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
+ datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);
+
+ compare = ApplySortComparator(datum1, isnull1,
+ datum2, isnull2,
+ sortKey);
+ if (compare != 0)
+ return compare; /* done when we find unequal attributes */
+
+ /* they are equal, so we only need to examine one null flag */
+ if (isnull1)
+ equal_hasnull = true;
+ }
+
+ /*
+ * If btree has asked us to enforce uniqueness, complain if two equal
+ * tuples are detected (unless there was at least one NULL field and NULLS
+ * NOT DISTINCT was not set).
+ *
+ * It is sufficient to make the test here, because if two tuples are equal
+ * they *must* get compared at some stage of the sort --- otherwise the
+ * sort algorithm wouldn't have checked whether one must appear before the
+ * other.
+ */
+ if (arg->enforceUnique && !(!arg->uniqueNullsNotDistinct && equal_hasnull))
+ {
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ char *key_desc;
+
+ /*
+ * Some rather brain-dead implementations of qsort (such as the one in
+ * QNX 4) will sometimes call the comparison routine to compare a
+ * value to itself, but we always use our own implementation, which
+ * does not.
+ */
+ Assert(tuple1 != tuple2);
+
+ index_deform_tuple(tuple1, tupDes, values, isnull);
+
+ key_desc = BuildIndexValueDescription(arg->index.indexRel, values, isnull);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_UNIQUE_VIOLATION),
+ errmsg("could not create unique index \"%s\"",
+ RelationGetRelationName(arg->index.indexRel)),
+ key_desc ? errdetail("Key %s is duplicated.", key_desc) :
+ errdetail("Duplicate keys exist."),
+ errtableconstraint(arg->index.heapRel,
+ RelationGetRelationName(arg->index.indexRel))));
+ }
+
+ /*
+ * If key values are equal, we sort on ItemPointer. This is required for
+ * btree indexes, since heap TID is treated as an implicit last key
+ * attribute in order to ensure that all keys in the index are physically
+ * unique.
+ */
+ {
+ BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
+ BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
+
+ if (blk1 != blk2)
+ return (blk1 < blk2) ? -1 : 1;
+ }
+ {
+ OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
+ OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
+
+ if (pos1 != pos2)
+ return (pos1 < pos2) ? -1 : 1;
+ }
+
+ /* ItemPointer values should never be equal */
+ Assert(false);
+
+ return 0;
+}
+
+static int
+comparetup_index_hash(const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state)
+{
+ Bucket bucket1;
+ Bucket bucket2;
+ IndexTuple tuple1;
+ IndexTuple tuple2;
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortIndexHashArg *arg = (TuplesortIndexHashArg *) base->arg;
+
+ /*
+ * Fetch hash keys and mask off bits we don't want to sort by. We know
+ * that the first column of the index tuple is the hash key.
+ */
+ Assert(!a->isnull1);
+ bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1),
+ arg->max_buckets, arg->high_mask,
+ arg->low_mask);
+ Assert(!b->isnull1);
+ bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1),
+ arg->max_buckets, arg->high_mask,
+ arg->low_mask);
+ if (bucket1 > bucket2)
+ return 1;
+ else if (bucket1 < bucket2)
+ return -1;
+
+ /*
+ * If hash values are equal, we sort on ItemPointer. This does not affect
+ * validity of the finished index, but it may be useful to have index
+ * scans in physical order.
+ */
+ tuple1 = (IndexTuple) a->tuple;
+ tuple2 = (IndexTuple) b->tuple;
+
+ {
+ BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid);
+ BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid);
+
+ if (blk1 != blk2)
+ return (blk1 < blk2) ? -1 : 1;
+ }
+ {
+ OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid);
+ OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid);
+
+ if (pos1 != pos2)
+ return (pos1 < pos2) ? -1 : 1;
+ }
+
+ /* ItemPointer values should never be equal */
+ Assert(false);
+
+ return 0;
+}
+
+static void
+writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ IndexTuple tuple = (IndexTuple) stup->tuple;
+ unsigned int tuplen;
+
+ tuplen = IndexTupleSize(tuple) + sizeof(tuplen);
+ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
+ LogicalTapeWrite(tape, (void *) tuple, IndexTupleSize(tuple));
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_index(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortIndexArg *arg = (TuplesortIndexArg *) base->arg;
+ unsigned int tuplen = len - sizeof(unsigned int);
+ IndexTuple tuple = (IndexTuple) tuplesort_readtup_alloc(state, tuplen);
+
+ LogicalTapeReadExact(tape, tuple, tuplen);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+ stup->tuple = (void *) tuple;
+ /* set up first-column key value */
+ stup->datum1 = index_getattr(tuple,
+ 1,
+ RelationGetDescr(arg->indexRel),
+ &stup->isnull1);
+}
+
+/*
+ * Routines specialized for DatumTuple case
+ */
+
+static void
+removeabbrev_datum(Tuplesortstate *state, SortTuple *stups, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ stups[i].datum1 = PointerGetDatum(stups[i].tuple);
+}
+
+static int
+comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ int compare;
+
+ compare = ApplySortComparator(a->datum1, a->isnull1,
+ b->datum1, b->isnull1,
+ base->sortKeys);
+ if (compare != 0)
+ return compare;
+
+ /* if we have abbreviations, then "tuple" has the original value */
+
+ if (base->sortKeys->abbrev_converter)
+ compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1,
+ PointerGetDatum(b->tuple), b->isnull1,
+ base->sortKeys);
+
+ return compare;
+}
+
+static void
+writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ TuplesortDatumArg *arg = (TuplesortDatumArg *) base->arg;
+ void *waddr;
+ unsigned int tuplen;
+ unsigned int writtenlen;
+
+ if (stup->isnull1)
+ {
+ waddr = NULL;
+ tuplen = 0;
+ }
+ else if (!base->tuples)
+ {
+ waddr = &stup->datum1;
+ tuplen = sizeof(Datum);
+ }
+ else
+ {
+ waddr = stup->tuple;
+ tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, arg->datumTypeLen);
+ Assert(tuplen != 0);
+ }
+
+ writtenlen = tuplen + sizeof(unsigned int);
+
+ LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen));
+ LogicalTapeWrite(tape, waddr, tuplen);
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen));
+}
+
+static void
+readtup_datum(Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len)
+{
+ TuplesortPublic *base = TuplesortstateGetPublic(state);
+ unsigned int tuplen = len - sizeof(unsigned int);
+
+ if (tuplen == 0)
+ {
+ /* it's NULL */
+ stup->datum1 = (Datum) 0;
+ stup->isnull1 = true;
+ stup->tuple = NULL;
+ }
+ else if (!base->tuples)
+ {
+ Assert(tuplen == sizeof(Datum));
+ LogicalTapeReadExact(tape, &stup->datum1, tuplen);
+ stup->isnull1 = false;
+ stup->tuple = NULL;
+ }
+ else
+ {
+ void *raddr = tuplesort_readtup_alloc(state, tuplen);
+
+ LogicalTapeReadExact(tape, raddr, tuplen);
+ stup->datum1 = PointerGetDatum(raddr);
+ stup->isnull1 = false;
+ stup->tuple = raddr;
+ }
+
+ if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+}
#include "access/itup.h"
#include "executor/tuptable.h"
#include "storage/dsm.h"
+#include "utils/logtape.h"
#include "utils/relcache.h"
+#include "utils/sortsupport.h"
/*
int64 spaceUsed; /* space consumption, in kB */
} TuplesortInstrumentation;
+/*
+ * The objects we actually sort are SortTuple structs. These contain
+ * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple),
+ * which is a separate palloc chunk --- we assume it is just one chunk and
+ * can be freed by a simple pfree() (except during merge, when we use a
+ * simple slab allocator). SortTuples also contain the tuple's first key
+ * column in Datum/nullflag format, and a source/input tape number that
+ * tracks which tape each heap element/slot belongs to during merging.
+ *
+ * Storing the first key column lets us save heap_getattr or index_getattr
+ * calls during tuple comparisons. We could extract and save all the key
+ * columns not just the first, but this would increase code complexity and
+ * overhead, and wouldn't actually save any comparison cycles in the common
+ * case where the first key determines the comparison result. Note that
+ * for a pass-by-reference datatype, datum1 points into the "tuple" storage.
+ *
+ * There is one special case: when the sort support infrastructure provides an
+ * "abbreviated key" representation, where the key is (typically) a pass by
+ * value proxy for a pass by reference type. In this case, the abbreviated key
+ * is stored in datum1 in place of the actual first key column.
+ *
+ * When sorting single Datums, the data value is represented directly by
+ * datum1/isnull1 for pass by value types (or null values). If the datatype is
+ * pass-by-reference and isnull1 is false, then "tuple" points to a separately
+ * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then
+ * either the same pointer as "tuple", or is an abbreviated key value as
+ * described above. Accordingly, "tuple" is always used in preference to
+ * datum1 as the authoritative value for pass-by-reference cases.
+ */
+typedef struct
+{
+ void *tuple; /* the tuple itself */
+ Datum datum1; /* value of first key column */
+ bool isnull1; /* is first key column NULL? */
+ int srctape; /* source tape number */
+} SortTuple;
+
+typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b,
+ Tuplesortstate *state);
+
+/*
+ * The public part of a Tuple sort operation state. This data structure
+ * containsthe definition of sort-variant-specific interface methods and
+ * the part of Tuple sort operation state required by their implementations.
+ */
+typedef struct
+{
+ /*
+ * These function pointers decouple the routines that must know what kind
+ * of tuple we are sorting from the routines that don't need to know it.
+ * They are set up by the tuplesort_begin_xxx routines.
+ *
+ * Function to compare two tuples; result is per qsort() convention, ie:
+ * <0, 0, >0 according as a<b, a=b, a>b. The API must match
+ * qsort_arg_comparator.
+ */
+ SortTupleComparator comparetup;
+
+ /*
+ * Alter datum1 representation in the SortTuple's array back from the
+ * abbreviated key to the first column value.
+ */
+ void (*removeabbrev) (Tuplesortstate *state, SortTuple *stups,
+ int count);
+
+ /*
+ * Function to write a stored tuple onto tape. The representation of the
+ * tuple on tape need not be the same as it is in memory.
+ */
+ void (*writetup) (Tuplesortstate *state, LogicalTape *tape,
+ SortTuple *stup);
+
+ /*
+ * Function to read a stored tuple from tape back into memory. 'len' is
+ * the already-read length of the stored tuple. The tuple is allocated
+ * from the slab memory arena, or is palloc'd, see
+ * tuplesort_readtup_alloc().
+ */
+ void (*readtup) (Tuplesortstate *state, SortTuple *stup,
+ LogicalTape *tape, unsigned int len);
+
+ /*
+ * Function to do some specific release of resources for the sort variant.
+ * In particular, this function should free everything stored in the "arg"
+ * field, which wouldn't be cleared on reset of the Tuple sort memory
+ * contextes. This can be NULL if nothing specific needs to be done.
+ */
+ void (*freestate) (Tuplesortstate *state);
+
+ /*
+ * The subsequent fields are used in the implementations of the functions
+ * above.
+ */
+ MemoryContext maincontext; /* memory context for tuple sort metadata that
+ * persists across multiple batches */
+ MemoryContext sortcontext; /* memory context holding most sort data */
+ MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */
+
+ /*
+ * Whether SortTuple's datum1 and isnull1 members are maintained by the
+ * above routines. If not, some sort specializations are disabled.
+ */
+ bool haveDatum1;
+
+ /*
+ * The sortKeys variable is used by every case other than the hash index
+ * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the
+ * MinimalTuple and CLUSTER routines, though.
+ */
+ int nKeys; /* number of columns in sort key */
+ SortSupport sortKeys; /* array of length nKeys */
+
+ /*
+ * This variable is shared by the single-key MinimalTuple case and the
+ * Datum case (which both use qsort_ssup()). Otherwise, it's NULL. The
+ * presence of a value in this field is also checked by various sort
+ * specialization functions as an optimization when comparing the leading
+ * key in a tiebreak situation to determine if there are any subsequent
+ * keys to sort on.
+ */
+ SortSupport onlyKey;
+
+ int sortopt; /* Bitmask of flags used to setup sort */
+
+ bool tuples; /* Can SortTuple.tuple ever be set? */
+
+ void *arg; /* Specific information for the sort variant */
+} TuplesortPublic;
+
+/* Sort parallel code from state for sort__start probes */
+#define PARALLEL_SORT(coordinate) (coordinate == NULL || \
+ (coordinate)->sharedsort == NULL ? 0 : \
+ (coordinate)->isWorker ? 1 : 2)
+
+#define TuplesortstateGetPublic(state) ((TuplesortPublic *) state)
+
+/* When using this macro, beware of double evaluation of len */
+#define LogicalTapeReadExact(tape, ptr, len) \
+ do { \
+ if (LogicalTapeRead(tape, ptr, len) != (size_t) (len)) \
+ elog(ERROR, "unexpected end of data"); \
+ } while(0)
/*
* We provide multiple interfaces to what is essentially the same code,
* generated (typically, caller uses a parallel heap scan).
*/
+
+extern Tuplesortstate *tuplesort_begin_common(int workMem,
+ SortCoordinate coordinate,
+ int sortopt);
+extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
+extern bool tuplesort_used_bound(Tuplesortstate *state);
+extern void tuplesort_puttuple_common(Tuplesortstate *state,
+ SortTuple *tuple, bool useAbbrev);
+extern void tuplesort_performsort(Tuplesortstate *state);
+extern bool tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
+ SortTuple *stup);
+extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
+ bool forward);
+extern void tuplesort_end(Tuplesortstate *state);
+extern void tuplesort_reset(Tuplesortstate *state);
+
+extern void tuplesort_get_stats(Tuplesortstate *state,
+ TuplesortInstrumentation *stats);
+extern const char *tuplesort_method_name(TuplesortMethod m);
+extern const char *tuplesort_space_type_name(TuplesortSpaceType t);
+
+extern int tuplesort_merge_order(int64 allowedMem);
+
+extern Size tuplesort_estimate_shared(int nworkers);
+extern void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers,
+ dsm_segment *seg);
+extern void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg);
+
+/*
+ * These routines may only be called if randomAccess was specified 'true'.
+ * Likewise, backwards scan in gettuple/getdatum is only allowed if
+ * randomAccess was specified. Note that parallel sorts do not support
+ * randomAccess.
+ */
+
+extern void tuplesort_rescan(Tuplesortstate *state);
+extern void tuplesort_markpos(Tuplesortstate *state);
+extern void tuplesort_restorepos(Tuplesortstate *state);
+
+extern void *tuplesort_readtup_alloc(Tuplesortstate *state, Size tuplen);
+
+
+/* tuplesortvariants.c */
+
extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
int nkeys, AttrNumber *attNums,
Oid *sortOperators, Oid *sortCollations,
int workMem, SortCoordinate coordinate,
int sortopt);
-extern void tuplesort_set_bound(Tuplesortstate *state, int64 bound);
-extern bool tuplesort_used_bound(Tuplesortstate *state);
-
extern void tuplesort_puttupleslot(Tuplesortstate *state,
TupleTableSlot *slot);
extern void tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup);
extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
bool isNull);
-extern void tuplesort_performsort(Tuplesortstate *state);
-
extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
bool copy, TupleTableSlot *slot, Datum *abbrev);
extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
Datum *val, bool *isNull, Datum *abbrev);
-extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
- bool forward);
-
-extern void tuplesort_end(Tuplesortstate *state);
-
-extern void tuplesort_reset(Tuplesortstate *state);
-
-extern void tuplesort_get_stats(Tuplesortstate *state,
- TuplesortInstrumentation *stats);
-extern const char *tuplesort_method_name(TuplesortMethod m);
-extern const char *tuplesort_space_type_name(TuplesortSpaceType t);
-
-extern int tuplesort_merge_order(int64 allowedMem);
-
-extern Size tuplesort_estimate_shared(int nworkers);
-extern void tuplesort_initialize_shared(Sharedsort *shared, int nWorkers,
- dsm_segment *seg);
-extern void tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg);
-
-/*
- * These routines may only be called if randomAccess was specified 'true'.
- * Likewise, backwards scan in gettuple/getdatum is only allowed if
- * randomAccess was specified. Note that parallel sorts do not support
- * randomAccess.
- */
-
-extern void tuplesort_rescan(Tuplesortstate *state);
-extern void tuplesort_markpos(Tuplesortstate *state);
-extern void tuplesort_restorepos(Tuplesortstate *state);
#endif /* TUPLESORT_H */