From ba61d646d816713e0c4bb6fc866488c1232ef275 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 21 Nov 2017 17:19:15 +0300 Subject: [PATCH 001/182] Improve support PostgreSQL 11devel --- Makefile | 6 ++-- src/rum_ts_utils.c | 6 ++-- src/rumbtree.c | 10 +++--- src/rumbulk.c | 2 +- src/rumdatapage.c | 10 +++--- src/rumentrypage.c | 16 ++++----- src/rumget.c | 86 +++++++++++++++++++++++----------------------- src/ruminsert.c | 10 +++--- src/rumsort.c | 10 +++--- src/rumvacuum.c | 20 +++++------ t/001_wal.pl | 2 +- 11 files changed, 90 insertions(+), 88 deletions(-) diff --git a/Makefile b/Makefile index 0d854b70cc..8ea9be0e9f 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ DATA = rum--1.0.sql DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql -DATA_built = rum--$(EXTVERSION).sql $(DATA_updates) +SQL_built = rum--$(EXTVERSION).sql $(DATA_updates) INCLUDES = rum.h rumsort.h RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES)) @@ -47,7 +47,7 @@ endif wal-check: temp-install $(prove_check) -all: rum--$(EXTVERSION).sql +all: $(SQL_built) #9.6 requires 1.2 file but 10.0 could live with update files rum--$(EXTVERSION).sql: $(DATA) $(DATA_updates) @@ -62,11 +62,13 @@ install: installincludes installincludes: $(INSTALL) -d '$(DESTDIR)$(includedir_server)/' $(INSTALL_DATA) $(addprefix $(srcdir)/, $(RELATIVE_INCLUDES)) '$(DESTDIR)$(includedir_server)/' + $(INSTALL_DATA) $(SQL_built) '$(DESTDIR)$(datadir)/$(datamoduledir)/' uninstall: uninstallincludes uninstallincludes: rm -f $(addprefix '$(DESTDIR)$(includedir_server)/', $(INCLUDES)) + rm -f $(addprefix '$(DESTDIR)$(datadir)/$(datamoduledir)'/, $(notdir $(SQL_built))) ISOLATIONCHECKS= predicate-rum predicate-rum-2 diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 07faabe42c..38d5075c7e 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -171,7 +171,7 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); bool recheck; - bool res = FALSE; + bool res = false; if (query->size > 0) { @@ -268,7 +268,7 @@ rum_tsquery_consistent(PG_FUNCTION_ARGS) bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); - bool res = FALSE; + bool res = false; /* * The query requires recheck only if it involves weights @@ -313,7 +313,7 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); - bool res = FALSE; + bool res = false; /* * The query requires recheck only if it involves weights diff --git a/src/rumbtree.c b/src/rumbtree.c index 825e7618e4..f6244f5308 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -32,7 +32,7 @@ rumTraverseLock(Buffer buffer, bool searchMode) page = BufferGetPage(buffer); if (RumPageIsLeaf(page)) { - if (searchMode == FALSE) + if (searchMode == false) { /* we should relock our page */ LockBuffer(buffer, RUM_UNLOCK); @@ -120,7 +120,7 @@ rumReFindLeafPage(RumBtree btree, RumBtreeStack * stack) RumBtreeStack * rumFindLeafPage(RumBtree btree, RumBtreeStack * stack) { - bool isfirst = TRUE; + bool isfirst = true; BlockNumber rootBlkno; if (!stack) @@ -141,7 +141,7 @@ rumFindLeafPage(RumBtree btree, RumBtreeStack * stack) { if (RumPageIsLeaf(page) && !btree->searchMode) access = RUM_EXCLUSIVE; - isfirst = FALSE; + isfirst = false; } else access = rumTraverseLock(stack->buffer, btree->searchMode); @@ -150,7 +150,7 @@ rumFindLeafPage(RumBtree btree, RumBtreeStack * stack) * ok, page is correctly locked, we should check to move right .., * root never has a right link, so small optimization */ - while (btree->fullScan == FALSE && stack->blkno != rootBlkno && + while (btree->fullScan == false && stack->blkno != rootBlkno && btree->isMoveRight(btree, page)) { BlockNumber rightlink = RumPageGetOpaque(page)->rightlink; @@ -605,7 +605,7 @@ rumInsertValue(Relation index, RumBtree btree, RumBtreeStack * stack, } } - btree->isDelete = FALSE; + btree->isDelete = false; /* search parent to lock */ LockBuffer(parent->buffer, RUM_EXCLUSIVE); diff --git a/src/rumbulk.c b/src/rumbulk.c index 68f7fdef08..6e08056c55 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -56,7 +56,7 @@ rumCombineData(RBNode *existing, const RBNode *newdata, void *arg) Assert(res != 0); if (res > 0) - eo->shouldSort = TRUE; + eo->shouldSort = true; } eo->list[eo->count] = en->list[0]; diff --git a/src/rumdatapage.c b/src/rumdatapage.c index a2af672871..bd112930dd 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -631,7 +631,7 @@ dataLocateLeafItem(RumBtree btree, RumBtreeStack * stack) if (btree->fullScan) { stack->off = FirstOffsetNumber; - return TRUE; + return true; } return findInLeafPage(btree, page, &stack->off, &iptr, &ptr); @@ -1460,10 +1460,10 @@ rumPrepareDataScan(RumBtree btree, Relation index, OffsetNumber attnum, RumState btree->splitPage = dataSplitPage; btree->fillRoot = rumDataFillRoot; - btree->isData = TRUE; - btree->searchMode = FALSE; - btree->isDelete = FALSE; - btree->fullScan = FALSE; + btree->isData = true; + btree->searchMode = false; + btree->isDelete = false; + btree->fullScan = false; btree->scanDirection = ForwardScanDirection; btree->entryAttnum = attnum; diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 3f7e934651..7029942e78 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -125,7 +125,7 @@ entryIsMoveRight(RumBtree btree, Page page) RumNullCategory category; if (RumPageRightMost(page)) - return FALSE; + return false; itup = getRightMostTuple(page); attnum = rumtuple_get_attrnum(btree->rumstate, itup); @@ -134,9 +134,9 @@ entryIsMoveRight(RumBtree btree, Page page) if (rumCompareAttEntries(btree->rumstate, btree->entryAttnum, btree->entryKey, btree->entryCategory, attnum, key, category) > 0) - return TRUE; + return true; - return FALSE; + return false; } /* @@ -232,7 +232,7 @@ entryLocateLeafEntry(RumBtree btree, RumBtreeStack * stack) if (btree->fullScan) { stack->off = FirstOffsetNumber; - return TRUE; + return true; } low = FirstOffsetNumber; @@ -558,12 +558,12 @@ rumPrepareEntryScan(RumBtree btree, OffsetNumber attnum, btree->splitPage = entrySplitPage; btree->fillRoot = rumEntryFillRoot; - btree->isData = FALSE; - btree->searchMode = FALSE; - btree->fullScan = FALSE; + btree->isData = false; + btree->searchMode = false; + btree->fullScan = false; btree->entryAttnum = attnum; btree->entryKey = key; btree->entryCategory = category; - btree->isDelete = FALSE; + btree->isDelete = false; } diff --git a/src/rumget.c b/src/rumget.c index 119765d941..33c0672acf 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -221,7 +221,7 @@ scanPostingTree(Relation index, RumScanEntry scanEntry, Assert(ScanDirectionIsForward(scanEntry->scanDirection)); /* Descend to the leftmost leaf page */ - gdi = rumPrepareScanPostingTree(index, rootPostingTree, TRUE, + gdi = rumPrepareScanPostingTree(index, rootPostingTree, true, ForwardScanDirection, attnum, rumstate); buffer = rumScanBeginPostingTree(gdi, NULL); @@ -550,7 +550,7 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) entry->stack = NULL; entry->nlist = 0; entry->matchSortstate = NULL; - entry->reduceResult = FALSE; + entry->reduceResult = false; entry->predictNumberResult = 0; /* @@ -560,12 +560,12 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) rumPrepareEntryScan(&btreeEntry, entry->attnum, entry->queryKey, entry->queryCategory, rumstate); - btreeEntry.searchMode = TRUE; + btreeEntry.searchMode = true; stackEntry = rumFindLeafPage(&btreeEntry, NULL); page = BufferGetPage(stackEntry->buffer); - needUnlock = TRUE; + needUnlock = true; - entry->isFinished = TRUE; + entry->isFinished = true; PredicateLockPage(rumstate->index, BufferGetBlockNumber(stackEntry->buffer), snapshot); @@ -602,7 +602,7 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) { rum_tuplesort_performsort(entry->matchSortstate); ItemPointerSetMin(&entry->collectRumItem.item.iptr); - entry->isFinished = FALSE; + entry->isFinished = false; } } else if (btreeEntry.findItem(&btreeEntry, stackEntry) || @@ -631,8 +631,8 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) * root of posting tree. */ LockBuffer(stackEntry->buffer, RUM_UNLOCK); - needUnlock = FALSE; - gdi = rumPrepareScanPostingTree(rumstate->index, rootPostingTree, TRUE, + needUnlock = false; + gdi = rumPrepareScanPostingTree(rumstate->index, rootPostingTree, true, entry->scanDirection, entry->attnum, rumstate); entry->buffer = rumScanBeginPostingTree(gdi, entry->useMarkAddInfo ? @@ -713,9 +713,9 @@ cmpEntries(RumState *rumstate, RumScanEntry e1, RumScanEntry e2) { int res; - if (e1->isFinished == TRUE) + if (e1->isFinished == true) { - if (e2->isFinished == TRUE) + if (e2->isFinished == true) return 0; else return 1; @@ -780,7 +780,7 @@ startScan(IndexScanDesc scan) for (i = 0; i < so->totalentries; i++) { so->entries[i]->predictNumberResult /= so->totalentries; - so->entries[i]->reduceResult = TRUE; + so->entries[i]->reduceResult = true; } } } @@ -900,7 +900,7 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) ItemPointerSetInvalid(&entry->curItem.iptr); entry->buffer = InvalidBuffer; - entry->isFinished = TRUE; + entry->isFinished = true; entry->gdi->stack->buffer = InvalidBuffer; return; } @@ -989,7 +989,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) entry->nlist = 0; } entry->matchSortstate = NULL; - entry->reduceResult = FALSE; + entry->reduceResult = false; entry->predictNumberResult = 0; rumPrepareEntryScan(&btree, entry->attnum, @@ -1003,7 +1003,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) if (!moveRightIfItNeeded(&btree, entry->stack)) { ItemPointerSetInvalid(&entry->curItem.iptr); - entry->isFinished = TRUE; + entry->isFinished = true; LockBuffer(entry->stack->buffer, RUM_UNLOCK); return false; } @@ -1019,7 +1019,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) if (rumtuple_get_attrnum(btree.rumstate, itup) != entry->attnum) { ItemPointerSetInvalid(&entry->curItem.iptr); - entry->isFinished = TRUE; + entry->isFinished = true; LockBuffer(entry->stack->buffer, RUM_UNLOCK); return false; } @@ -1049,7 +1049,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) LockBuffer(entry->stack->buffer, RUM_UNLOCK); needUnlock = false; gdi = rumPrepareScanPostingTree(rumstate->index, - rootPostingTree, TRUE, entry->scanDirection, + rootPostingTree, true, entry->scanDirection, entry->attnumOrig, rumstate); entry->buffer = rumScanBeginPostingTree(gdi, NULL); @@ -1082,7 +1082,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) } LockBuffer(entry->buffer, RUM_UNLOCK); - entry->isFinished = FALSE; + entry->isFinished = false; } else if (RumGetNPosting(itup) > 0) { @@ -1117,7 +1117,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) /* * Sets entry->curItem to next heap item pointer for one entry of one scan key, - * or sets entry->isFinished to TRUE if there are no more. + * or sets entry->isFinished to true if there are no more. * * Item pointers must be returned in ascending order. */ @@ -1141,7 +1141,7 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps /* We are finished, but should return last result */ if (ItemPointerIsMax(&entry->collectRumItem.item.iptr)) { - entry->isFinished = TRUE; + entry->isFinished = true; rum_tuplesort_end(entry->matchSortstate); entry->matchSortstate = NULL; break; @@ -1250,7 +1250,7 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps /* even current call is last */ if (ItemPointerIsMin(&entry->curItem.iptr)) { - entry->isFinished = TRUE; + entry->isFinished = true; rum_tuplesort_end(entry->matchSortstate); entry->matchSortstate = NULL; break; @@ -1274,7 +1274,7 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps else { ItemPointerSetInvalid(&entry->curItem.iptr); - entry->isFinished = TRUE; + entry->isFinished = true; } } /* Get next item from posting tree */ @@ -1283,12 +1283,12 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps do { entryGetNextItem(rumstate, entry, snapshot); - } while (entry->isFinished == FALSE && - entry->reduceResult == TRUE && + } while (entry->isFinished == false && + entry->reduceResult == true && dropItem(entry)); if (entry->stack && entry->isFinished) { - entry->isFinished = FALSE; + entry->isFinished = false; if (entryGetNextItemList(rumstate, entry, snapshot) && nextEntryList) *nextEntryList = true; } @@ -1304,7 +1304,7 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps * TID passes the consistentFn test. If so, key->recheckCurItem is set true * iff recheck is needed for this item pointer * - * If all entry streams are exhausted, sets key->isFinished to TRUE. + * If all entry streams are exhausted, sets key->isFinished to true. * * Item pointers must be returned in ascending order. */ @@ -1366,7 +1366,7 @@ keyGetItem(RumState * rumstate, MemoryContext tempCtx, RumScanKey key) if (allFinished) { /* all entries are finished */ - key->isFinished = TRUE; + key->isFinished = true; return; } @@ -1390,16 +1390,16 @@ keyGetItem(RumState * rumstate, MemoryContext tempCtx, RumScanKey key) for (i = 0; i < key->nentries; i++) { entry = key->scanEntry[i]; - if (entry->isFinished == FALSE && + if (entry->isFinished == false && rumCompareItemPointers(&entry->curItem.iptr, &key->curItem.iptr) == 0) { - key->entryRes[i] = TRUE; + key->entryRes[i] = true; key->addInfo[i] = entry->curItem.addInfo; key->addInfoIsNull[i] = entry->curItem.addInfoIsNull; } else { - key->entryRes[i] = FALSE; + key->entryRes[i] = false; key->addInfo[i] = (Datum) 0; key->addInfoIsNull[i] = true; } @@ -1441,13 +1441,13 @@ scanGetItemRegular(IndexScanDesc scan, RumItem *advancePast, * scan direction. On first call myAdvancePast is invalid, * so anyway we are needed to call entryGetItem() */ - allFinished = TRUE; + allFinished = true; for (i = 0; i < so->totalentries; i++) { RumScanEntry entry = so->entries[i]; - while (entry->isFinished == FALSE && + while (entry->isFinished == false && (!ItemPointerIsValid(&myAdvancePast.iptr) || compareCurRumItemScanDirection(rumstate, entry, &myAdvancePast) <= 0)) @@ -1687,7 +1687,7 @@ entryFindItem(RumState * rumstate, RumScanEntry entry, RumItem * item, Snapshot { if (entry->nlist == 0) { - entry->isFinished = TRUE; + entry->isFinished = true; return; } @@ -1720,7 +1720,7 @@ entryFindItem(RumState * rumstate, RumScanEntry entry, RumItem * item, Snapshot if (!BufferIsValid(entry->buffer)) { - entry->isFinished = TRUE; + entry->isFinished = true; return; } @@ -1762,7 +1762,7 @@ entryFindItem(RumState * rumstate, RumScanEntry entry, RumItem * item, Snapshot if (entry->buffer == InvalidBuffer) { ItemPointerSetInvalid(&entry->curItem.iptr); - entry->isFinished = TRUE; + entry->isFinished = true; return; } @@ -1927,7 +1927,7 @@ scanGetItemFast(IndexScanDesc scan, RumItem *advancePast, * If we found false in preConsistent then we can safely move entries * which was true in preConsistent argument. */ - if (so->sortedEntries[i - 1]->isFinished == TRUE) + if (so->sortedEntries[i - 1]->isFinished == true) return false; if (preConsistentResult == false) @@ -1949,17 +1949,17 @@ scanGetItemFast(IndexScanDesc scan, RumItem *advancePast, { RumScanEntry entry = key->scanEntry[j]; - if (entry->isFinished == FALSE && + if (entry->isFinished == false && rumCompareItemPointers(&entry->curItem.iptr, &so->sortedEntries[so->totalentries - 1]->curItem.iptr) == 0) { - key->entryRes[j] = TRUE; + key->entryRes[j] = true; key->addInfo[j] = entry->curItem.addInfo; key->addInfoIsNull[j] = entry->curItem.addInfoIsNull; } else { - key->entryRes[j] = FALSE; + key->entryRes[j] = false; key->addInfo[j] = (Datum) 0; key->addInfoIsNull[j] = true; } @@ -2029,11 +2029,11 @@ scanGetItemFull(IndexScanDesc scan, RumItem *advancePast, entry = so->entries[0]; entryGetItem(&so->rumstate, entry, &nextEntryList, scan->xs_snapshot); - if (entry->isFinished == TRUE) + if (entry->isFinished == true) return false; /* Fill outerAddInfo */ - key->entryRes[0] = TRUE; + key->entryRes[0] = true; key->addInfo[0] = entry->curItem.addInfo; key->addInfoIsNull[0] = entry->curItem.addInfoIsNull; callAddInfoConsistentFn(&so->rumstate, key); @@ -2045,7 +2045,7 @@ scanGetItemFull(IndexScanDesc scan, RumItem *advancePast, RumScanEntry orderEntry = so->entries[i]; if (orderEntry->nlist > 0) { - orderEntry->isFinished = FALSE; + orderEntry->isFinished = false; orderEntry->offset = InvalidOffsetNumber; RumItemSetMin(&orderEntry->curItem); } @@ -2055,7 +2055,7 @@ scanGetItemFull(IndexScanDesc scan, RumItem *advancePast, { RumScanEntry orderEntry = so->entries[i]; - while (orderEntry->isFinished == FALSE && + while (orderEntry->isFinished == false && (!ItemPointerIsValid(&orderEntry->curItem.iptr) || compareCurRumItemScanDirection(&so->rumstate, orderEntry, &entry->curItem) < 0)) @@ -2170,7 +2170,7 @@ keyGetOrdering(RumState * rumstate, MemoryContext tempCtx, RumScanKey key, for (i = 0; i < key->nentries; i++) { entry = key->scanEntry[i]; - if (entry->isFinished == FALSE && + if (entry->isFinished == false && rumCompareItemPointers(&entry->curItem.iptr, iptr) == 0) { key->addInfo[i] = entry->curItem.addInfo; diff --git a/src/ruminsert.c b/src/ruminsert.c index 6a74c994e0..f2870afcb2 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -93,7 +93,7 @@ createPostingTree(RumState * rumstate, OffsetNumber attnum, Relation index, * Form a tuple for entry tree. * * If the tuple would be too big to be stored, function throws a suitable - * error if errorTooBig is TRUE, or returns NULL if errorTooBig is FALSE. + * error if errorTooBig is true, or returns NULL if errorTooBig is false. * * See src/backend/access/gin/README for a description of the index tuple * format that is being built here. We build on the assumption that we @@ -297,7 +297,7 @@ addItemPointersToLeafTuple(RumState * rumstate, buildStats->nDataPages++; /* Now insert the TIDs-to-be-added into the posting tree */ - gdi = rumPrepareScanPostingTree(rumstate->index, postingRoot, FALSE, + gdi = rumPrepareScanPostingTree(rumstate->index, postingRoot, false, ForwardScanDirection, attnum, rumstate); rumInsertItemPointers(rumstate, attnum, gdi, items, nitem, buildStats); @@ -374,7 +374,7 @@ buildFreshLeafTuple(RumState * rumstate, { RumPostingTreeScan *gdi; - gdi = rumPrepareScanPostingTree(rumstate->index, postingRoot, FALSE, + gdi = rumPrepareScanPostingTree(rumstate->index, postingRoot, false, ForwardScanDirection, attnum, rumstate); @@ -441,7 +441,7 @@ rumEntryInsert(RumState * rumstate, /* insert into posting tree */ gdi = rumPrepareScanPostingTree(rumstate->index, rootPostingTree, - FALSE, ForwardScanDirection, + false, ForwardScanDirection, attnum, rumstate); rumInsertItemPointers(rumstate, attnum, gdi, items, nitem, buildStats); @@ -454,7 +454,7 @@ rumEntryInsert(RumState * rumstate, itup = addItemPointersToLeafTuple(rumstate, itup, items, nitem, buildStats); - btree.isDelete = TRUE; + btree.isDelete = true; } else { diff --git a/src/rumsort.c b/src/rumsort.c index 7d60c72bb9..748c1191d4 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -1381,7 +1381,7 @@ rum_tuplesort_end(RumTuplesortstate *state) /* * Grow the memtuples[] array, if possible within our memory constraint. - * Return TRUE if we were able to enlarge the array, FALSE if not. + * Return true if we were able to enlarge the array, false if not. * * Normally, at each increment we double the size of the array. When we no * longer have enough memory to do that, we attempt one last, smaller increase @@ -1853,7 +1853,7 @@ rum_tuplesort_performsort(RumTuplesortstate *state) /* * Internal routine to fetch the next tuple in either forward or back - * direction into *stup. Returns FALSE if no more tuples. + * direction into *stup. Returns false if no more tuples. * If *should_free is set, the caller must pfree stup.tuple when done with it. */ static bool @@ -2054,8 +2054,8 @@ rum_tuplesort_gettuple_common(RumTuplesortstate *state, bool forward, /* * Fetch the next tuple in either forward or back direction. - * If successful, put tuple in slot and return TRUE; else, clear the slot - * and return FALSE. + * If successful, put tuple in slot and return true; else, clear the slot + * and return false. */ bool rum_tuplesort_gettupleslot(RumTuplesortstate *state, bool forward, @@ -2123,7 +2123,7 @@ rum_tuplesort_getindextuple(RumTuplesortstate *state, bool forward, /* * Fetch the next Datum in either forward or back direction. - * Returns FALSE if no more datums. + * Returns false if no more datums. * * If the Datum is pass-by-ref type, the returned value is freshly palloc'd * and is now owned by the caller. diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 96aaa84829..75ed121758 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -214,7 +214,7 @@ rumVacuumPostingTreeLeaves(RumVacuumState * gvs, OffsetNumber attnum, { Buffer buffer; Page page; - bool hasVoidPage = FALSE; + bool hasVoidPage = false; buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); @@ -264,7 +264,7 @@ rumVacuumPostingTreeLeaves(RumVacuumState * gvs, OffsetNumber attnum, /* if root is a leaf page, we don't desire further processing */ if (!isRoot && RumPageGetOpaque(newPage)->maxoff < FirstOffsetNumber) - hasVoidPage = TRUE; + hasVoidPage = true; GenericXLogFinish(state); } @@ -272,19 +272,19 @@ rumVacuumPostingTreeLeaves(RumVacuumState * gvs, OffsetNumber attnum, else { OffsetNumber i; - bool isChildHasVoid = FALSE; + bool isChildHasVoid = false; for (i = FirstOffsetNumber; i <= RumPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = (PostingItem *) RumDataPageGetItem(page, i); if (rumVacuumPostingTreeLeaves(gvs, attnum, - PostingItemGetBlockNumber(pitem), FALSE, NULL)) - isChildHasVoid = TRUE; + PostingItemGetBlockNumber(pitem), false, NULL)) + isChildHasVoid = true; } if (isChildHasVoid) - hasVoidPage = TRUE; + hasVoidPage = true; } /* @@ -494,7 +494,7 @@ rumScanToDelete(RumVacuumState * gvs, BlockNumber blkno, bool isRoot, { PostingItem *pitem = (PostingItem *) RumDataPageGetItem(page, i); - if (rumScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) + if (rumScanToDelete(gvs, PostingItemGetBlockNumber(pitem), false, me, i)) i--; } } @@ -522,18 +522,18 @@ rumVacuumPostingTree(RumVacuumState * gvs, OffsetNumber attnum, BlockNumber root *ptr, *tmp; - if (rumVacuumPostingTreeLeaves(gvs, attnum, rootBlkno, TRUE, &rootBuffer) == FALSE) + if (rumVacuumPostingTreeLeaves(gvs, attnum, rootBlkno, true, &rootBuffer) == false) { Assert(rootBuffer == InvalidBuffer); return; } memset(&root, 0, sizeof(DataPageDeleteStack)); - root.isRoot = TRUE; + root.isRoot = true; vacuum_delay_point(); - rumScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber); + rumScanToDelete(gvs, rootBlkno, true, &root, InvalidOffsetNumber); ptr = root.child; while (ptr) diff --git a/t/001_wal.pl b/t/001_wal.pl index 053aecadb5..182dc22073 100644 --- a/t/001_wal.pl +++ b/t/001_wal.pl @@ -16,7 +16,7 @@ sub test_index_replay # Wait for standby to catch up my $applname = $node_standby->name; my $caughtup_query = - "SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';"; + "SELECT pg_current_wal_lsn() <= write_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; $node_master->poll_query_until('postgres', $caughtup_query) or die "Timed out while waiting for standby 1 to catch up"; From 99ebca13b7437a90af4126c5d48068245d95f1b8 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 24 Nov 2017 17:26:15 +0300 Subject: [PATCH 002/182] Revert commit ba61d64 for Makefile --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 8ea9be0e9f..0d854b70cc 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ DATA = rum--1.0.sql DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql -SQL_built = rum--$(EXTVERSION).sql $(DATA_updates) +DATA_built = rum--$(EXTVERSION).sql $(DATA_updates) INCLUDES = rum.h rumsort.h RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES)) @@ -47,7 +47,7 @@ endif wal-check: temp-install $(prove_check) -all: $(SQL_built) +all: rum--$(EXTVERSION).sql #9.6 requires 1.2 file but 10.0 could live with update files rum--$(EXTVERSION).sql: $(DATA) $(DATA_updates) @@ -62,13 +62,11 @@ install: installincludes installincludes: $(INSTALL) -d '$(DESTDIR)$(includedir_server)/' $(INSTALL_DATA) $(addprefix $(srcdir)/, $(RELATIVE_INCLUDES)) '$(DESTDIR)$(includedir_server)/' - $(INSTALL_DATA) $(SQL_built) '$(DESTDIR)$(datadir)/$(datamoduledir)/' uninstall: uninstallincludes uninstallincludes: rm -f $(addprefix '$(DESTDIR)$(includedir_server)/', $(INCLUDES)) - rm -f $(addprefix '$(DESTDIR)$(datadir)/$(datamoduledir)'/, $(notdir $(SQL_built))) ISOLATIONCHECKS= predicate-rum predicate-rum-2 From 1c6ae7f0fec3d944216d25cbae3e308113070302 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 24 Nov 2017 17:27:23 +0300 Subject: [PATCH 003/182] Revert "Revert commit ba61d64 for Makefile" This reverts commit 99ebca13b7437a90af4126c5d48068245d95f1b8. --- Makefile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 0d854b70cc..8ea9be0e9f 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ DATA = rum--1.0.sql DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql -DATA_built = rum--$(EXTVERSION).sql $(DATA_updates) +SQL_built = rum--$(EXTVERSION).sql $(DATA_updates) INCLUDES = rum.h rumsort.h RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES)) @@ -47,7 +47,7 @@ endif wal-check: temp-install $(prove_check) -all: rum--$(EXTVERSION).sql +all: $(SQL_built) #9.6 requires 1.2 file but 10.0 could live with update files rum--$(EXTVERSION).sql: $(DATA) $(DATA_updates) @@ -62,11 +62,13 @@ install: installincludes installincludes: $(INSTALL) -d '$(DESTDIR)$(includedir_server)/' $(INSTALL_DATA) $(addprefix $(srcdir)/, $(RELATIVE_INCLUDES)) '$(DESTDIR)$(includedir_server)/' + $(INSTALL_DATA) $(SQL_built) '$(DESTDIR)$(datadir)/$(datamoduledir)/' uninstall: uninstallincludes uninstallincludes: rm -f $(addprefix '$(DESTDIR)$(includedir_server)/', $(INCLUDES)) + rm -f $(addprefix '$(DESTDIR)$(datadir)/$(datamoduledir)'/, $(notdir $(SQL_built))) ISOLATIONCHECKS= predicate-rum predicate-rum-2 From 7307988c6eb108094b28a3d9f3a0357b0e40cb05 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 24 Nov 2017 17:36:01 +0300 Subject: [PATCH 004/182] Do not remove generated *.sql files --- Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 8ea9be0e9f..afe1f7f425 100644 --- a/Makefile +++ b/Makefile @@ -11,8 +11,11 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ src/rumscan.o src/rumutil.o src/rumvacuum.o src/rumvalidate.o \ src/btree_rum.o src/rum_arr_utils.o $(WIN32RES) -DATA = rum--1.0.sql +DATA_first = rum--1.0.sql DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql +DATA = $(DATA_first) rum--$(EXTVERSION).sql $(DATA_updates) + +# Do not use DATA_built. It removes built files if clean target was used SQL_built = rum--$(EXTVERSION).sql $(DATA_updates) INCLUDES = rum.h rumsort.h @@ -50,8 +53,8 @@ wal-check: temp-install all: $(SQL_built) #9.6 requires 1.2 file but 10.0 could live with update files -rum--$(EXTVERSION).sql: $(DATA) $(DATA_updates) - cat $(DATA) $(DATA_updates) > rum--$(EXTVERSION).sql +rum--$(EXTVERSION).sql: $(DATA_first) $(DATA_updates) + cat $(DATA_first) $(DATA_updates) > rum--$(EXTVERSION).sql # rule for updates, e.g. rum--1.0--1.1.sql rum--%.sql: gen_rum_sql--%.pl @@ -62,13 +65,11 @@ install: installincludes installincludes: $(INSTALL) -d '$(DESTDIR)$(includedir_server)/' $(INSTALL_DATA) $(addprefix $(srcdir)/, $(RELATIVE_INCLUDES)) '$(DESTDIR)$(includedir_server)/' - $(INSTALL_DATA) $(SQL_built) '$(DESTDIR)$(datadir)/$(datamoduledir)/' uninstall: uninstallincludes uninstallincludes: rm -f $(addprefix '$(DESTDIR)$(includedir_server)/', $(INCLUDES)) - rm -f $(addprefix '$(DESTDIR)$(datadir)/$(datamoduledir)'/, $(notdir $(SQL_built))) ISOLATIONCHECKS= predicate-rum predicate-rum-2 From c654b78001f668f530d2e9cdbdf11327d0706f8b Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 27 Nov 2017 18:23:45 +0300 Subject: [PATCH 005/182] Issue #31. Add information about installation via PGXN --- README.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8e50ec69cb..6a4c1e18d1 100644 --- a/README.md +++ b/README.md @@ -41,13 +41,21 @@ Before build and install **rum** you should ensure following: Typical installation procedure may look like this: +### Using GitHub repository + $ git clone https://github.com/postgrespro/rum $ cd rum $ make USE_PGXS=1 - $ sudo make USE_PGXS=1 install + $ make USE_PGXS=1 install $ make USE_PGXS=1 installcheck $ psql DB -c "CREATE EXTENSION rum;" +### Using PGXN + + $ USE_PGXS=1 pgxn install rum + +> **Important:** Don't forget to set the `PG_CONFIG` variable in case you want to test `RUM` on a custom build of PostgreSQL. Read more [here](https://wiki.postgresql.org/wiki/Building_and_Installing_PostgreSQL_Extension_Modules). + ## Common operators and functions **rum** module provides next operators. From 3f8e58b2d68a473e09a7a9f352f17b0e4034c43f Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 27 Nov 2017 18:28:18 +0300 Subject: [PATCH 006/182] Add LICENSE --- LICENSE | 11 +++++++++++ README.md | 3 +++ 2 files changed, 14 insertions(+) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..d73dbdb454 --- /dev/null +++ b/LICENSE @@ -0,0 +1,11 @@ +RUM is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. + +Copyright (c) 2015-2017, Postgres Professional +Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. \ No newline at end of file diff --git a/README.md b/README.md index 6a4c1e18d1..32e8196929 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ +[![PGXN version](https://badge.fury.io/pg/rum.svg)](https://badge.fury.io/pg/rum) +[![GitHub license](https://img.shields.io/badge/license-PostgreSQL-blue.svg)](https://raw.githubusercontent.com/postgrespro/rum/master/LICENSE) + [![Postgres Professional](img/PGpro-logo.png)](https://postgrespro.com/) # RUM - RUM access method From 2cdd6d23f31fa2b41664edacd48f23cbe4d7b197 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 27 Nov 2017 18:30:24 +0300 Subject: [PATCH 007/182] Fin bug in inverted fulltext search Array of operands in QueryItemWrap was reallocated when overflow. However, during that QueryItemWrap's changes their location. And parent pointer might become invalid. Fixed by moving operands from array to list. --- expected/ruminv.out | 29 ++++++++++-------- sql/ruminv.sql | 1 + src/rumtsquery.c | 75 +++++++++++++++++++++------------------------ 3 files changed, 53 insertions(+), 52 deletions(-) diff --git a/expected/ruminv.out b/expected/ruminv.out index 6fd5a6457b..840dcfc85c 100644 --- a/expected/ruminv.out +++ b/expected/ruminv.out @@ -9,6 +9,7 @@ INSERT INTO test_invrum VALUES ('(a|b)&c'::tsquery); INSERT INTO test_invrum VALUES ('(!(a|b))&c'::tsquery); INSERT INTO test_invrum VALUES ('(a|b)&(c|d)'::tsquery); INSERT INTO test_invrum VALUES ('!a'::tsquery); +INSERT INTO test_invrum VALUES ('(a|a1|a2|a3|a4|a5)&(b|b1|b2|b3|b4|b5|b6)&!(c|c1|c2|c3)'::tsquery); SELECT * FROM test_invrum WHERE q @@ ''::tsvector; q ---------------- @@ -36,12 +37,13 @@ SELECT * FROM test_invrum WHERE q @@ 'b'::tsvector; (4 rows) SELECT * FROM test_invrum WHERE q @@ 'a b'::tsvector; - q ------------- + q +-------------------------------------------------------------------------------------------------------------------------------- 'a' | 'b' 'a' & 'b' !'a' | 'b' -(3 rows) + ( 'a' | 'a1' | 'a2' | 'a3' | 'a4' | 'a5' ) & ( 'b' | 'b1' | 'b2' | 'b3' | 'b4' | 'b5' | 'b6' ) & !( 'c' | 'c1' | 'c2' | 'c3' ) +(4 rows) SELECT * FROM test_invrum WHERE q @@ 'c'::tsvector; q @@ -113,13 +115,14 @@ SELECT * FROM test_invrum WHERE q @@ 'b d'::tsvector; (5 rows) SELECT * FROM test_invrum WHERE q @@ 'a b d'::tsvector; - q -------------------------------- + q +-------------------------------------------------------------------------------------------------------------------------------- 'a' | 'b' 'a' & 'b' !'a' | 'b' ( 'a' | 'b' ) & ( 'c' | 'd' ) -(4 rows) + ( 'a' | 'a1' | 'a2' | 'a3' | 'a4' | 'a5' ) & ( 'b' | 'b1' | 'b2' | 'b3' | 'b4' | 'b5' | 'b6' ) & !( 'c' | 'c1' | 'c2' | 'c3' ) +(5 rows) SELECT * FROM test_invrum WHERE q @@ 'c d'::tsvector; q @@ -166,12 +169,13 @@ SELECT * FROM test_invrum WHERE q @@ 'b'::tsvector; (4 rows) SELECT * FROM test_invrum WHERE q @@ 'a b'::tsvector; - q ------------- + q +-------------------------------------------------------------------------------------------------------------------------------- 'a' | 'b' 'a' & 'b' !'a' | 'b' -(3 rows) + ( 'a' | 'a1' | 'a2' | 'a3' | 'a4' | 'a5' ) & ( 'b' | 'b1' | 'b2' | 'b3' | 'b4' | 'b5' | 'b6' ) & !( 'c' | 'c1' | 'c2' | 'c3' ) +(4 rows) SELECT * FROM test_invrum WHERE q @@ 'c'::tsvector; q @@ -243,13 +247,14 @@ SELECT * FROM test_invrum WHERE q @@ 'b d'::tsvector; (5 rows) SELECT * FROM test_invrum WHERE q @@ 'a b d'::tsvector; - q -------------------------------- + q +-------------------------------------------------------------------------------------------------------------------------------- 'a' | 'b' 'a' & 'b' !'a' | 'b' ( 'a' | 'b' ) & ( 'c' | 'd' ) -(4 rows) + ( 'a' | 'a1' | 'a2' | 'a3' | 'a4' | 'a5' ) & ( 'b' | 'b1' | 'b2' | 'b3' | 'b4' | 'b5' | 'b6' ) & !( 'c' | 'c1' | 'c2' | 'c3' ) +(5 rows) SELECT * FROM test_invrum WHERE q @@ 'c d'::tsvector; q diff --git a/sql/ruminv.sql b/sql/ruminv.sql index ec836fd165..b1c8eba709 100644 --- a/sql/ruminv.sql +++ b/sql/ruminv.sql @@ -10,6 +10,7 @@ INSERT INTO test_invrum VALUES ('(a|b)&c'::tsquery); INSERT INTO test_invrum VALUES ('(!(a|b))&c'::tsquery); INSERT INTO test_invrum VALUES ('(a|b)&(c|d)'::tsquery); INSERT INTO test_invrum VALUES ('!a'::tsquery); +INSERT INTO test_invrum VALUES ('(a|a1|a2|a3|a4|a5)&(b|b1|b2|b3|b4|b5|b6)&!(c|c1|c2|c3)'::tsquery); SELECT * FROM test_invrum WHERE q @@ ''::tsvector; SELECT * FROM test_invrum WHERE q @@ 'a'::tsvector; diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 242360346a..0a021aea66 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -25,9 +25,7 @@ typedef struct QueryItemWrap QueryItemType type; int8 oper; bool not; - int operandsCount, - operandsAllocated; - struct QueryItemWrap *operands; + List *operands; struct QueryItemWrap *parent; int distance, length; @@ -40,29 +38,12 @@ add_child(QueryItemWrap * parent) { QueryItemWrap *result; - if (!parent) - { - result = (QueryItemWrap *) palloc0(sizeof(QueryItemWrap)); - } - else + result = (QueryItemWrap *) palloc0(sizeof(QueryItemWrap)); + + if (parent) { - parent->operandsCount++; - while (parent->operandsCount > parent->operandsAllocated) - { - if (parent->operandsAllocated > 0) - { - parent->operandsAllocated *= 2; - parent->operands = (QueryItemWrap *) repalloc(parent->operands, parent->operandsAllocated * sizeof(*parent->operands)); - } - else - { - parent->operandsAllocated = 4; - parent->operands = (QueryItemWrap *) palloc(parent->operandsAllocated * sizeof(*parent->operands)); - } - } - result = &parent->operands[parent->operandsCount - 1]; - memset(result, 0, sizeof(*result)); result->parent = parent; + parent->operands = lappend(parent->operands, result); } return result; } @@ -129,13 +110,15 @@ make_query_item_wrap(QueryItem *item, QueryItemWrap * parent, bool not) static int calc_wraps(QueryItemWrap * wrap, int *num) { - int i, - notCount = 0, + int notCount = 0, result; + ListCell *lc; - for (i = 0; i < wrap->operandsCount; i++) + foreach(lc, wrap->operands) { - if (wrap->operands[i].not) + QueryItemWrap *item = (QueryItemWrap *) lfirst(lc); + + if (item->not) notCount++; } @@ -143,7 +126,7 @@ calc_wraps(QueryItemWrap * wrap, int *num) { wrap->num = (*num)++; if (wrap->oper == OP_AND) - wrap->sum = notCount + 1 - wrap->operandsCount; + wrap->sum = notCount + 1 - list_length(wrap->operands); if (wrap->oper == OP_OR) wrap->sum = notCount; } @@ -153,8 +136,12 @@ calc_wraps(QueryItemWrap * wrap, int *num) } result = 0; - for (i = 0; i < wrap->operandsCount; i++) - result += calc_wraps(&wrap->operands[i], num); + foreach(lc, wrap->operands) + { + QueryItemWrap *item = (QueryItemWrap *) lfirst(lc); + + result += calc_wraps(item, num); + } return result; } @@ -167,22 +154,26 @@ check_allnegative(QueryItemWrap * wrap) } else if (wrap->oper == OP_AND) { - int i; + ListCell *lc; - for (i = 0; i < wrap->operandsCount; i++) + foreach(lc, wrap->operands) { - if (!check_allnegative(&wrap->operands[i])) + QueryItemWrap *item = (QueryItemWrap *) lfirst(lc); + + if (!check_allnegative(item)) return false; } return true; } else if (wrap->oper == OP_OR) { - int i; + ListCell *lc; - for (i = 0; i < wrap->operandsCount; i++) + foreach(lc, wrap->operands) { - if (check_allnegative(&wrap->operands[i])) + QueryItemWrap *item = (QueryItemWrap *) lfirst(lc); + + if (check_allnegative(item)) return true; } return false; @@ -348,10 +339,14 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) } else if (wrap->type == QI_OPR) { - int i; + ListCell *lc; - for (i = 0; i < wrap->operandsCount; i++) - extract_wraps(&wrap->operands[i], context, level + 1); + foreach(lc, wrap->operands) + { + QueryItemWrap *item = (QueryItemWrap *) lfirst(lc); + + extract_wraps(item, context, level + 1); + } } } From 73864069b9d59e09b4bc5ad7696605d395df7b5c Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 27 Nov 2017 18:35:06 +0300 Subject: [PATCH 008/182] Remove some old debug code from the comments. --- src/rumtsquery.c | 58 ------------------------------------------------ 1 file changed, 58 deletions(-) diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 0a021aea66..9b6ccf465b 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -282,11 +282,6 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) context->addInfo[index] = PointerGetDatum(addinfo); context->addInfoIsNull[index] = false; context->index++; - - /* - * ptrEnd = (unsigned char *) VARDATA(addinfo) + VARHDRSZ + 2 * - * Max(level, 1) * MAX_ENCODED_LEN; - */ } else { @@ -295,25 +290,13 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) VARSIZE(addinfo) + 2 * Max(level, 1) * MAX_ENCODED_LEN); context->addInfo[index] = PointerGetDatum(addinfo); ptr = (unsigned char *) VARDATA(addinfo) + VARSIZE_ANY_EXHDR(addinfo); - - /* - * ptrEnd = (unsigned char *) VARDATA(addinfo) + - * VARSIZE_ANY_EXHDR(addinfo) + 2 * Max(level, 1) * - * MAX_ENCODED_LEN; - */ } - /* - * elog(NOTICE, "%s", - * text_to_cstring(DatumGetTextP(context->entries[index]))); - */ - while (wrap->parent) { QueryItemWrap *parent = wrap->parent; uint32 sum; - /* elog(NOTICE, "%d %d %d", parent->num, parent->sum, wrap->not); */ encode_varbyte((uint32) parent->num, &ptr); sum = (uint32) abs(parent->sum); sum <<= 2; @@ -329,13 +312,7 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) encode_varbyte(1, &ptr); encode_varbyte(4 | 1, &ptr); } - /* Assert(ptr <= ptrEnd); */ SET_VARSIZE(addinfo, ptr - (unsigned char *) addinfo); - - /* - * elog(NOTICE, "%s", DatumGetPointer(DirectFunctionCall1(byteaout, - * PointerGetDatum(addinfo)))); - */ } else if (wrap->type == QI_OPR) { @@ -350,21 +327,6 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) } } -/*PG_FUNCTION_INFO_V1(rum_process_tsquery); -Datum -rum_process_tsquery(PG_FUNCTION_ARGS) -{ - TSQuery query = PG_GETARG_TSQUERY(0); - QueryItem *item = GETQUERY(query); - QueryItemWrap *wrap = make_query_item_wrap(item, NULL, false); - int num = 1; - - calc_wraps(wrap, &num); - print_wraps(wrap, , 0); - - PG_RETURN_VOID(); -}*/ - PG_FUNCTION_INFO_V1(ruminv_extract_tsquery); Datum ruminv_extract_tsquery(PG_FUNCTION_ARGS) @@ -414,12 +376,6 @@ ruminv_extract_tsquery(PG_FUNCTION_ARGS) } *nentries = count; -/* elog(NOTICE, "%d", *nentries); - for (i = 0; i < *nentries; i++) - { - elog(NOTICE, "%s", text_to_cstring(DatumGetPointer((entries)[i]))); - }*/ - PG_FREE_IF_COPY(query, 0); PG_RETURN_POINTER(entries); } @@ -518,11 +474,6 @@ ruminv_tsvector_consistent(PG_FUNCTION_ARGS) ptr = (unsigned char *) VARDATA_ANY(DatumGetPointer(addInfo[i])); size = VARSIZE_ANY_EXHDR(DatumGetPointer(addInfo[i])); - /* - * elog(NOTICE, "%d %s", i, - * DatumGetPointer(DirectFunctionCall1(byteaout, addInfo[i]))); - */ - if (size == 0) { res = true; @@ -544,8 +495,6 @@ ruminv_tsvector_consistent(PG_FUNCTION_ARGS) index = num - 1; - /* elog(NOTICE, "a %d %d %d %d", i, index, sum, not); */ - if (child) { child->parent = index; @@ -585,11 +534,6 @@ ruminv_tsvector_consistent(PG_FUNCTION_ARGS) } else { - /* - * for (i = 0; i < lastIndex; i++) { elog(NOTICE, "s %d %d %d %d", i, - * nodes[i].sum, nodes[i].parent, nodes[i].not); } - */ - for (i = lastIndex - 1; i >= 0; i--) { if (nodes[i].parent != -2) @@ -612,8 +556,6 @@ ruminv_tsvector_consistent(PG_FUNCTION_ARGS) } } -/* elog(NOTICE, "%d", res);*/ - PG_RETURN_BOOL(res); } From 146d31db18f96886850f511b38449793b2fc5c46 Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 27 Nov 2017 23:20:06 +0300 Subject: [PATCH 009/182] Some comments to tsquery indexing It would be good to write proper documentation for that. But it's not possible for me now due to lack of time. Thus, few comments are still slighly better than nothing. --- src/rumtsquery.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 9b6ccf465b..205526ff37 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -20,6 +20,9 @@ #include "rum.h" +/* + * A "wrapper" over tsquery item. More suitable representation for pocessing. + */ typedef struct QueryItemWrap { QueryItemType type; @@ -33,8 +36,11 @@ typedef struct QueryItemWrap int num; } QueryItemWrap; +/* + * Add child to tsquery item wrap. + */ static QueryItemWrap * -add_child(QueryItemWrap * parent) +add_child(QueryItemWrap *parent) { QueryItemWrap *result; @@ -48,8 +54,11 @@ add_child(QueryItemWrap * parent) return result; } +/* + * Make wrapper over tsquery item. Flattern tree if needed. + */ static QueryItemWrap * -make_query_item_wrap(QueryItem *item, QueryItemWrap * parent, bool not) +make_query_item_wrap(QueryItem *item, QueryItemWrap *parent, bool not) { if (item->type == QI_VAL) { @@ -107,8 +116,11 @@ make_query_item_wrap(QueryItem *item, QueryItemWrap * parent, bool not) return NULL; } +/* + * Recursively calculate "sum" for tsquery item wraps. + */ static int -calc_wraps(QueryItemWrap * wrap, int *num) +calc_wraps(QueryItemWrap *wrap, int *num) { int notCount = 0, result; @@ -145,6 +157,10 @@ calc_wraps(QueryItemWrap * wrap, int *num) return result; } +/* + * Check if tsquery doesn't need any positive lexeme occurence for satisfaction. + * That is this funciton returns true when tsquery maches empty tsvector. + */ static bool check_allnegative(QueryItemWrap * wrap) { @@ -186,6 +202,7 @@ check_allnegative(QueryItemWrap * wrap) } +/* Max length of variable-length encoded 32-bit integer */ #define MAX_ENCODED_LEN 5 /* @@ -253,8 +270,11 @@ typedef struct char *operand; } ExtractContext; +/* + * Recursively extract entries from tsquery wraps. Encode paths into addInfos. + */ static void -extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) +extract_wraps(QueryItemWrap *wrap, ExtractContext *context, int level) { if (wrap->type == QI_VAL) { @@ -262,7 +282,7 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) unsigned char *ptr; int index; - + /* Check if given lexeme was already extracted */ for (index = 0; index < context->index; index++) { text *entry; @@ -273,6 +293,7 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) break; } + /* Either allocate new addInfo or extend existing addInfo */ if (index >= context->index) { index = context->index; @@ -292,6 +313,7 @@ extract_wraps(QueryItemWrap * wrap, ExtractContext * context, int level) ptr = (unsigned char *) VARDATA(addinfo) + VARSIZE_ANY_EXHDR(addinfo); } + /* Encode path into addInfo */ while (wrap->parent) { QueryItemWrap *parent = wrap->parent; @@ -471,6 +493,7 @@ ruminv_tsvector_consistent(PG_FUNCTION_ARGS) if (addInfoIsNull[i]) elog(ERROR, "Unexpected addInfoIsNull"); + /* Iterate path making corresponding calculation */ ptr = (unsigned char *) VARDATA_ANY(DatumGetPointer(addInfo[i])); size = VARSIZE_ANY_EXHDR(DatumGetPointer(addInfo[i])); @@ -528,6 +551,7 @@ ruminv_tsvector_consistent(PG_FUNCTION_ARGS) } } + /* Iterate over nodes */ if (allFalse && check[nkeys - 1]) { res = true; From 0e01f5d1c59e66e4b0c83943c3454c1c85dd3d45 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 29 Nov 2017 17:39:59 +0300 Subject: [PATCH 010/182] Support amproperty method. Query select pg_index_column_has_property('index_name'::regclass,1,'distance_orderable') returns value 'true' instead of null now. --- expected/rum.out | 39 +++++++++++++++++++++ sql/rum.sql | 13 +++++++ src/rum.h | 3 ++ src/rumutil.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 143 insertions(+) diff --git a/expected/rum.out b/expected/rum.out index 0b728d442d..f17ed984a8 100644 --- a/expected/rum.out +++ b/expected/rum.out @@ -47,6 +47,45 @@ CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON test_rum FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger('a', 'pg_catalog.english', 't'); CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); +-- Access method properties +SELECT a.amname, p.name, pg_indexam_has_property(a.oid,p.name) +FROM pg_am a, unnest(array['can_order','can_unique','can_multi_col','can_exclude']) p(name) +WHERE a.amname = 'rum' ORDER BY a.amname; + amname | name | pg_indexam_has_property +--------+---------------+------------------------- + rum | can_order | f + rum | can_unique | f + rum | can_multi_col | t + rum | can_exclude | t +(4 rows) + +-- Index properties +SELECT p.name, pg_index_has_property('rumidx'::regclass,p.name) +FROM unnest(array['clusterable','index_scan','bitmap_scan','backward_scan']) p(name); + name | pg_index_has_property +---------------+----------------------- + clusterable | f + index_scan | t + bitmap_scan | t + backward_scan | f +(4 rows) + +-- Index column properties +SELECT p.name, pg_index_column_has_property('rumidx'::regclass,1,p.name) +FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_orderable','returnable','search_array','search_nulls']) p(name); + name | pg_index_column_has_property +--------------------+------------------------------ + asc | f + desc | f + nulls_first | f + nulls_last | f + orderable | f + distance_orderable | t + returnable | f + search_array | f + search_nulls | f +(9 rows) + \copy test_rum(t) from 'data/rum.data'; CREATE INDEX failed_rumidx ON test_rum USING rum (a rum_tsvector_addon_ops); ERROR: additional information attribute "a" is not found in index diff --git a/sql/rum.sql b/sql/rum.sql index 19424d4ca8..76d56de7d0 100644 --- a/sql/rum.sql +++ b/sql/rum.sql @@ -13,6 +13,19 @@ BEFORE UPDATE OR INSERT ON test_rum FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger('a', 'pg_catalog.english', 't'); CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); +-- Access method properties +SELECT a.amname, p.name, pg_indexam_has_property(a.oid,p.name) +FROM pg_am a, unnest(array['can_order','can_unique','can_multi_col','can_exclude']) p(name) +WHERE a.amname = 'rum' ORDER BY a.amname; + +-- Index properties +SELECT p.name, pg_index_has_property('rumidx'::regclass,p.name) +FROM unnest(array['clusterable','index_scan','bitmap_scan','backward_scan']) p(name); + +-- Index column properties +SELECT p.name, pg_index_column_has_property('rumidx'::regclass,1,p.name) +FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_orderable','returnable','search_array','search_nulls']) p(name); + \copy test_rum(t) from 'data/rum.data'; CREATE INDEX failed_rumidx ON test_rum USING rum (a rum_tsvector_addon_ops); diff --git a/src/rum.h b/src/rum.h index 1370b72a9d..11f20300fd 100644 --- a/src/rum.h +++ b/src/rum.h @@ -400,6 +400,9 @@ typedef struct RumState /* rumutil.c */ extern bytea *rumoptions(Datum reloptions, bool validate); +extern bool rumproperty(Oid index_oid, int attno, + IndexAMProperty prop, const char *propname, + bool *res, bool *isnull); extern Datum rumhandler(PG_FUNCTION_ARGS); extern void initRumState(RumState * state, Relation index); extern Buffer RumNewBuffer(Relation index); diff --git a/src/rumutil.c b/src/rumutil.c index 22c0b835f7..838674882f 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -13,8 +13,10 @@ #include "postgres.h" +#include "access/htup_details.h" #include "access/reloptions.h" #include "catalog/pg_collation.h" +#include "catalog/pg_opclass.h" #include "catalog/pg_type.h" #include "miscadmin.h" #include "storage/indexfsm.h" @@ -23,6 +25,7 @@ #include "utils/guc.h" #include "utils/index_selfuncs.h" #include "utils/lsyscache.h" +#include "utils/syscache.h" #include "utils/typcache.h" #include "rum.h" @@ -111,6 +114,9 @@ rumhandler(PG_FUNCTION_ARGS) amroutine->amstorage = true; amroutine->amclusterable = false; amroutine->ampredlocks = true; +#if PG_VERSION_NUM >= 100000 + amroutine->amcanparallel = false; +#endif amroutine->amkeytype = InvalidOid; amroutine->ambuild = rumbuild; @@ -121,6 +127,7 @@ rumhandler(PG_FUNCTION_ARGS) amroutine->amcanreturn = NULL; amroutine->amcostestimate = gincostestimate; amroutine->amoptions = rumoptions; + amroutine->amproperty = rumproperty; amroutine->amvalidate = rumvalidate; amroutine->ambeginscan = rumbeginscan; amroutine->amrescan = rumrescan; @@ -129,6 +136,11 @@ rumhandler(PG_FUNCTION_ARGS) amroutine->amendscan = rumendscan; amroutine->ammarkpos = NULL; amroutine->amrestrpos = NULL; +#if PG_VERSION_NUM >= 100000 + amroutine->amestimateparallelscan = NULL; + amroutine->aminitparallelscan = NULL; + amroutine->amparallelrescan = NULL; +#endif PG_RETURN_POINTER(amroutine); } @@ -877,6 +889,82 @@ rumoptions(Datum reloptions, bool validate) return (bytea *) rdopts; } +bool +rumproperty(Oid index_oid, int attno, + IndexAMProperty prop, const char *propname, + bool *res, bool *isnull) +{ + HeapTuple tuple; + Form_pg_index rd_index PG_USED_FOR_ASSERTS_ONLY; + Form_pg_opclass rd_opclass; + Datum datum; + bool disnull; + oidvector *indclass; + Oid opclass, + opfamily, + opcintype; + int16 procno; + + /* Only answer column-level inquiries */ + if (attno == 0) + return false; + + switch (prop) + { + case AMPROP_DISTANCE_ORDERABLE: + procno = RUM_ORDERING_PROC; + break; + default: + return false; + } + + /* First we need to know the column's opclass. */ + + tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid)); + if (!HeapTupleIsValid(tuple)) + { + *isnull = true; + return true; + } + rd_index = (Form_pg_index) GETSTRUCT(tuple); + + /* caller is supposed to guarantee this */ + Assert(attno > 0 && attno <= rd_index->indnatts); + + datum = SysCacheGetAttr(INDEXRELID, tuple, + Anum_pg_index_indclass, &disnull); + Assert(!disnull); + + indclass = ((oidvector *) DatumGetPointer(datum)); + opclass = indclass->values[attno - 1]; + + ReleaseSysCache(tuple); + + /* Now look up the opclass family and input datatype. */ + + tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass)); + if (!HeapTupleIsValid(tuple)) + { + *isnull = true; + return true; + } + rd_opclass = (Form_pg_opclass) GETSTRUCT(tuple); + + opfamily = rd_opclass->opcfamily; + opcintype = rd_opclass->opcintype; + + ReleaseSysCache(tuple); + + /* And now we can check whether the function is provided. */ + + *res = SearchSysCacheExists4(AMPROCNUM, + ObjectIdGetDatum(opfamily), + ObjectIdGetDatum(opcintype), + ObjectIdGetDatum(opcintype), + Int16GetDatum(procno)); + return true; +} + /* * Fetch index's statistical data into *stats * From c6a1aa48da76bcc0f98a17d122d909bfb189de28 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 30 Nov 2017 14:38:15 +0300 Subject: [PATCH 011/182] PGPRO-1175: Do not set so->isVoidRes for ordering keys --- Makefile | 4 +- expected/rum.out | 82 -------------------------- expected/rum_validate.out | 121 ++++++++++++++++++++++++++++++++++++++ sql/rum.sql | 19 ------ sql/rum_validate.sql | 60 +++++++++++++++++++ src/rumscan.c | 12 ++-- 6 files changed, 189 insertions(+), 109 deletions(-) create mode 100644 expected/rum_validate.out create mode 100644 sql/rum_validate.sql diff --git a/Makefile b/Makefile index afe1f7f425..d7d540287c 100644 --- a/Makefile +++ b/Makefile @@ -23,8 +23,8 @@ RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES)) LDFLAGS_SL += $(filter -lm, $(LIBS)) -REGRESS = rum rum_hash ruminv timestamp orderby orderby_hash altorder \ - altorder_hash limits \ +REGRESS = rum rum_validate rum_hash ruminv timestamp orderby orderby_hash \ + altorder altorder_hash limits \ int2 int4 int8 float4 float8 money oid \ time timetz date interval \ macaddr inet cidr text varchar char bytea bit varbit \ diff --git a/expected/rum.out b/expected/rum.out index f17ed984a8..e0bc9f95dc 100644 --- a/expected/rum.out +++ b/expected/rum.out @@ -1,91 +1,9 @@ CREATE EXTENSION rum; --- First validate operator classes -SELECT opcname, amvalidate(opc.oid) -FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod -WHERE amname = 'rum' -ORDER BY opcname; - opcname | amvalidate ------------------------------------+------------ - rum_anyarray_addon_ops | t - rum_anyarray_ops | t - rum_bit_ops | t - rum_bytea_ops | t - rum_char_ops | t - rum_cidr_ops | t - rum_date_ops | t - rum_float4_ops | t - rum_float8_ops | t - rum_inet_ops | t - rum_int2_ops | t - rum_int4_ops | t - rum_int8_ops | t - rum_interval_ops | t - rum_macaddr_ops | t - rum_money_ops | t - rum_numeric_ops | t - rum_oid_ops | t - rum_text_ops | t - rum_time_ops | t - rum_timestamp_ops | t - rum_timestamptz_ops | t - rum_timetz_ops | t - rum_tsquery_ops | t - rum_tsvector_addon_ops | t - rum_tsvector_hash_addon_ops | t - rum_tsvector_hash_ops | t - rum_tsvector_hash_timestamp_ops | t - rum_tsvector_hash_timestamptz_ops | t - rum_tsvector_ops | t - rum_tsvector_timestamp_ops | t - rum_tsvector_timestamptz_ops | t - rum_varbit_ops | t - rum_varchar_ops | t -(34 rows) - CREATE TABLE test_rum( t text, a tsvector ); CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON test_rum FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger('a', 'pg_catalog.english', 't'); CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); --- Access method properties -SELECT a.amname, p.name, pg_indexam_has_property(a.oid,p.name) -FROM pg_am a, unnest(array['can_order','can_unique','can_multi_col','can_exclude']) p(name) -WHERE a.amname = 'rum' ORDER BY a.amname; - amname | name | pg_indexam_has_property ---------+---------------+------------------------- - rum | can_order | f - rum | can_unique | f - rum | can_multi_col | t - rum | can_exclude | t -(4 rows) - --- Index properties -SELECT p.name, pg_index_has_property('rumidx'::regclass,p.name) -FROM unnest(array['clusterable','index_scan','bitmap_scan','backward_scan']) p(name); - name | pg_index_has_property ----------------+----------------------- - clusterable | f - index_scan | t - bitmap_scan | t - backward_scan | f -(4 rows) - --- Index column properties -SELECT p.name, pg_index_column_has_property('rumidx'::regclass,1,p.name) -FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_orderable','returnable','search_array','search_nulls']) p(name); - name | pg_index_column_has_property ---------------------+------------------------------ - asc | f - desc | f - nulls_first | f - nulls_last | f - orderable | f - distance_orderable | t - returnable | f - search_array | f - search_nulls | f -(9 rows) - \copy test_rum(t) from 'data/rum.data'; CREATE INDEX failed_rumidx ON test_rum USING rum (a rum_tsvector_addon_ops); ERROR: additional information attribute "a" is not found in index diff --git a/expected/rum_validate.out b/expected/rum_validate.out new file mode 100644 index 0000000000..83a312c0ed --- /dev/null +++ b/expected/rum_validate.out @@ -0,0 +1,121 @@ +-- +-- Various sanity tests +-- +-- First validate operator classes +SELECT opcname, amvalidate(opc.oid) +FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod +WHERE amname = 'rum' +ORDER BY opcname; + opcname | amvalidate +-----------------------------------+------------ + rum_anyarray_addon_ops | t + rum_anyarray_ops | t + rum_bit_ops | t + rum_bytea_ops | t + rum_char_ops | t + rum_cidr_ops | t + rum_date_ops | t + rum_float4_ops | t + rum_float8_ops | t + rum_inet_ops | t + rum_int2_ops | t + rum_int4_ops | t + rum_int8_ops | t + rum_interval_ops | t + rum_macaddr_ops | t + rum_money_ops | t + rum_numeric_ops | t + rum_oid_ops | t + rum_text_ops | t + rum_time_ops | t + rum_timestamp_ops | t + rum_timestamptz_ops | t + rum_timetz_ops | t + rum_tsquery_ops | t + rum_tsvector_addon_ops | t + rum_tsvector_hash_addon_ops | t + rum_tsvector_hash_ops | t + rum_tsvector_hash_timestamp_ops | t + rum_tsvector_hash_timestamptz_ops | t + rum_tsvector_ops | t + rum_tsvector_timestamp_ops | t + rum_tsvector_timestamptz_ops | t + rum_varbit_ops | t + rum_varchar_ops | t +(34 rows) + +-- +-- Test access method and 'rumidx' index properties +-- +-- Access method properties +SELECT a.amname, p.name, pg_indexam_has_property(a.oid,p.name) +FROM pg_am a, unnest(array['can_order','can_unique','can_multi_col','can_exclude']) p(name) +WHERE a.amname = 'rum' ORDER BY a.amname; + amname | name | pg_indexam_has_property +--------+---------------+------------------------- + rum | can_order | f + rum | can_unique | f + rum | can_multi_col | t + rum | can_exclude | t +(4 rows) + +-- Index properties +SELECT p.name, pg_index_has_property('rumidx'::regclass,p.name) +FROM unnest(array['clusterable','index_scan','bitmap_scan','backward_scan']) p(name); + name | pg_index_has_property +---------------+----------------------- + clusterable | f + index_scan | t + bitmap_scan | t + backward_scan | f +(4 rows) + +-- Index column properties +SELECT p.name, pg_index_column_has_property('rumidx'::regclass,1,p.name) +FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_orderable','returnable','search_array','search_nulls']) p(name); + name | pg_index_column_has_property +--------------------+------------------------------ + asc | f + desc | f + nulls_first | f + nulls_last | f + orderable | f + distance_orderable | t + returnable | f + search_array | f + search_nulls | f +(9 rows) + +-- +-- Check incorrect operator class +-- +DROP INDEX rumidx; +-- Check incorrect operator class +CREATE OPERATOR CLASS rum_tsvector_norm_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 <=> (tsvector, rum_distance_query) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 6 rum_tsvector_config(internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), + FUNCTION 10 rum_ts_join_pos(internal, internal), + STORAGE text; +CREATE INDEX rum_norm_idx ON test_rum USING rum(a rum_tsvector_norm_ops); +SET enable_seqscan=off; +SET enable_bitmapscan=off; +SET enable_indexscan=on; +SELECT a + FROM test_rum + WHERE a @@ to_tsquery('pg_catalog.english', 'bar') + ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0) + a +------------------------------ + 'bar':2,8 'foo':1,3,6 'qq':7 +(1 row) + diff --git a/sql/rum.sql b/sql/rum.sql index 76d56de7d0..44648ca1b4 100644 --- a/sql/rum.sql +++ b/sql/rum.sql @@ -1,11 +1,5 @@ CREATE EXTENSION rum; --- First validate operator classes -SELECT opcname, amvalidate(opc.oid) -FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod -WHERE amname = 'rum' -ORDER BY opcname; - CREATE TABLE test_rum( t text, a tsvector ); CREATE TRIGGER tsvectorupdate @@ -13,19 +7,6 @@ BEFORE UPDATE OR INSERT ON test_rum FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger('a', 'pg_catalog.english', 't'); CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); --- Access method properties -SELECT a.amname, p.name, pg_indexam_has_property(a.oid,p.name) -FROM pg_am a, unnest(array['can_order','can_unique','can_multi_col','can_exclude']) p(name) -WHERE a.amname = 'rum' ORDER BY a.amname; - --- Index properties -SELECT p.name, pg_index_has_property('rumidx'::regclass,p.name) -FROM unnest(array['clusterable','index_scan','bitmap_scan','backward_scan']) p(name); - --- Index column properties -SELECT p.name, pg_index_column_has_property('rumidx'::regclass,1,p.name) -FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_orderable','returnable','search_array','search_nulls']) p(name); - \copy test_rum(t) from 'data/rum.data'; CREATE INDEX failed_rumidx ON test_rum USING rum (a rum_tsvector_addon_ops); diff --git a/sql/rum_validate.sql b/sql/rum_validate.sql new file mode 100644 index 0000000000..24bc4aa7c3 --- /dev/null +++ b/sql/rum_validate.sql @@ -0,0 +1,60 @@ +-- +-- Various sanity tests +-- + +-- First validate operator classes +SELECT opcname, amvalidate(opc.oid) +FROM pg_opclass opc JOIN pg_am am ON am.oid = opcmethod +WHERE amname = 'rum' +ORDER BY opcname; + +-- +-- Test access method and 'rumidx' index properties +-- + +-- Access method properties +SELECT a.amname, p.name, pg_indexam_has_property(a.oid,p.name) +FROM pg_am a, unnest(array['can_order','can_unique','can_multi_col','can_exclude']) p(name) +WHERE a.amname = 'rum' ORDER BY a.amname; + +-- Index properties +SELECT p.name, pg_index_has_property('rumidx'::regclass,p.name) +FROM unnest(array['clusterable','index_scan','bitmap_scan','backward_scan']) p(name); + +-- Index column properties +SELECT p.name, pg_index_column_has_property('rumidx'::regclass,1,p.name) +FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_orderable','returnable','search_array','search_nulls']) p(name); + +-- +-- Check incorrect operator class +-- + +DROP INDEX rumidx; + +-- Check incorrect operator class +CREATE OPERATOR CLASS rum_tsvector_norm_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 <=> (tsvector, rum_distance_query) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 6 rum_tsvector_config(internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), + FUNCTION 10 rum_ts_join_pos(internal, internal), + STORAGE text; + +CREATE INDEX rum_norm_idx ON test_rum USING rum(a rum_tsvector_norm_ops); + +SET enable_seqscan=off; +SET enable_bitmapscan=off; +SET enable_indexscan=on; + +SELECT a + FROM test_rum + WHERE a @@ to_tsquery('pg_catalog.english', 'bar') + ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0) \ No newline at end of file diff --git a/src/rumscan.c b/src/rumscan.c index 58b50bbc82..a730772a14 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -383,7 +383,9 @@ initScanKey(RumScanOpaque so, ScanKey skey, bool *hasPartialMatch) */ if (skey->sk_flags & SK_ISNULL) { - so->isVoidRes = true; + /* Do not set isVoidRes for order keys */ + if ((skey->sk_flags & SK_ORDER_BY) == 0) + so->isVoidRes = true; return; } @@ -415,7 +417,9 @@ initScanKey(RumScanOpaque so, ScanKey skey, bool *hasPartialMatch) { if (searchMode == GIN_SEARCH_MODE_DEFAULT) { - so->isVoidRes = true; + /* Do not set isVoidRes for order keys */ + if ((skey->sk_flags & SK_ORDER_BY) == 0) + so->isVoidRes = true; return; } nQueryValues = 0; /* ensure sane value */ @@ -612,11 +616,7 @@ rumNewScanKey(IndexScanDesc scan) } for (i = 0; i < scan->numberOfOrderBys; i++) - { initScanKey(so, &scan->orderByData[i], NULL); - if (so->isVoidRes) - break; - } /* * Fill markAddInfo if possible From 42fd8274ffe42b9deb1febd8dc6779d51894f2e6 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 5 Dec 2017 17:31:34 +0300 Subject: [PATCH 012/182] scanGetItemRegular(): Catch up order key with *item --- data/rum_array.data | 7000 +++++++++++++++++++++++++++++++++++++++++++ expected/array.out | 34 + sql/array.sql | 14 + src/rumget.c | 19 +- 4 files changed, 7065 insertions(+), 2 deletions(-) create mode 100644 data/rum_array.data diff --git a/data/rum_array.data b/data/rum_array.data new file mode 100644 index 0000000000..b3903d0f33 --- /dev/null +++ b/data/rum_array.data @@ -0,0 +1,7000 @@ +{18,31,54,95} +{23,50,13,9,39} +{99,54,77} +{79,83,16,63,32} +{52,41,61,79,94,87} +{76,59,39,36,21} +{} +{41,79,76,96,3} +{25,59,5,96,32} +{92,58,12,57} +{24,48,41,88} +{39,5,17} +{10,41,78,25,35} +{31,89,4} +{68,74,94} +{97,78,44,68,81,16} +{87,76} +{30,81} +{72,20,99,26} +{87,90,98,40,44} +{24,99,66,61} +{79,8,48,16} +{62,99,48,80,75,39} +{10,60,35,15} +{45,71,10,97,56} +{64,79,19,31} +{30,57,42,31,45} +{61,42,14,26} +{12,38,65,36,56,36} +{17,62,18,56} +{84,85,90,60,55,17} +{27,11,82,20,43} +{14,27,18,48,39,51} +{53,13,52} +{56,35,81,60,27} +{79,89,89,7} +{65,17,31,17,29,85} +{21,3} +{53,55,16,83,4} +{62,3,63} +{73,40,99} +{23,80} +{2,74,42,37,21} +{12,16} +{80,60} +{19,62,34} +{38,19,31,6,15,2} +{63,96,64,4,36,15} +{9,3} +{91,87,15,18,7,66} +{17,10} +{77,96} +{11,43,31,2,89} +{17,77,89,50} +{24,6,61,88,51} +{61,50,59,90,5,89} +{58,1,39,48} +{78,36,70,92} +{43,3,22,95,51} +{} +{88,64,25,64,86} +{34,6,49,90,25} +{86,35,13,22} +{21,44,83} +{42,88,72,65,59,96} +{36,33,1,98} +{16,54} +{35,16,44} +{73,23,20} +{84,25,1,52,35} +{27,36,54,87,31} +{38,47,83,3} +{64,13} +{65,84,85,16,22} +{57,9,39,73} +{89,11,67,55,73} +{78,39,84,63,62,45} +{50,63,8} +{} +{96,36,58,65,96} +{59,86,41,30} +{90,60,39,47,19} +{70,100,73,99} +{} +{85,14,39} +{76,53} +{96,38,52,13,87,85} +{97,51,15,30,53,87} +{30,59,9,40,13} +{31,91,68,79} +{37,56,39,78,75} +{82,2,47} +{33,25,45,40} +{51,21,92,20,18,76} +{84,93,36,95,34,69} +{66,25,5,40} +{77,6,57,42} +{} +{88,81,85,37,12} +{56,73,38} +{70,70,6,19} +{82,54,91} +{75,8} +{45,33,64,90,95} +{8,71,66,12} +{56,26,68,94} +{70,77,4,96,62,83} +{23,87} +{34,34,4,33} +{28,84} +{78,75,77} +{88,53} +{27,38} +{2,2,82} +{30,52,88,61,33} +{29,72,94,68} +{85,72} +{88,4} +{63,90,43,66,24,33} +{88,48,47} +{3,11,98,37,61} +{45,65,63,15,38} +{79,45,56,94} +{56,74,78,19,76} +{24,81,64,13,100} +{93,27,63,71,27,3} +{74,13,85,86,32,60} +{98,40,63,13} +{41,95,19,93,17,84} +{90,28,100,100,19,2} +{35,15,54} +{29,81,77} +{54,64,63,12,18} +{38,43,85,21,35} +{84,28,27,4,80,27} +{80,77,55,98} +{13,71,48,55,89,38} +{58,43,27,5,57} +{5,33,96,6} +{73,93,87,69,100,24} +{58,96,38,85,55,51} +{37,30,88,4,8,59} +{24,68,43,48,18,84} +{23,100,82,30,42} +{23,36,16,99,27} +{41,75} +{66,41,10,37,16,6} +{54,49,60} +{4,56,44,72,40} +{71,96,67,100,59} +{7,41} +{8,3,27} +{38,69,47,68,5,24} +{43,100,59,62} +{92,14,34,5,71,48} +{72,5,91,29,99,36} +{62,71,37,80,62,50} +{32,45,17} +{89,68} +{52,17,55} +{21,47,15,92} +{36,100,5} +{14,76,59,11,15} +{59,72} +{37,55,89,49} +{87,79,96,20,93} +{6,44} +{32,46,25} +{27,47,76,4,54} +{2,16} +{90,36} +{11,19,27,79} +{54,4} +{72,88} +{14,85,71,69,5,22} +{31,48} +{28,35,18} +{77,55,100,73,57,62} +{} +{14,59,53} +{98,3} +{13,56} +{26,61,88,54,88,33} +{70,12} +{55,16,15,42,76} +{13,75} +{97,38,82,51,86,53} +{41,76,39,84,32} +{94,66,47} +{55,28} +{} +{94,65,59,20} +{55,50,56,14,58} +{14,94,52,25,69,95} +{20,96} +{37,38} +{26,35,9,98,74} +{11,9,41,79} +{36,57,87,69,92,89} +{11,39,60,4,47,3} +{97,5} +{16,58,38,98,42} +{46,69} +{35,54} +{36,79,54} +{} +{63,78} +{12,86,52,29,60,30} +{29,27,58,86,42,62} +{42,12,60} +{90,93,85,29} +{16,8,45} +{29,33,85} +{32,14,6,47,74} +{14,85,14,26,3} +{46,71,10,16} +{30,63} +{} +{91,30,56} +{46,36,68,91,36,88} +{24,61} +{66,21,80,14} +{43,63,50,21,11} +{38,46,18,51} +{38,28,70} +{17,41,76,1,30} +{47,63} +{56,80,85,1,7,97} +{75,5,79,32} +{5,17,66,51,68} +{6,83,2} +{25,40,79,84} +{58,38,12,68} +{55,86,20,67,27} +{58,64} +{14,51} +{12,86,57,68} +{61,91,65,3,83,68} +{40,31,82,21} +\N +{24,64,35,32} +{32,83,18,27,43,32} +{50,83} +{94,84,58,3,25,79} +{66,2,27,36,24} +{71,34} +{17,57} +{22,40,49,50,10} +{79,62,94,78} +{92,79,24,72} +{23,41} +{69,60,77,70,18,48} +{39,45,91,85} +{27,43,22,21,85} +{84,51,96,7,18} +{100,38,69,93,66,39} +{73,42,35,15,69,98} +{100,17,37,15,40} +{1,91,2,17,90,48} +{18,12,52,24} +{39,43,89} +{16,13,88} +{69,8,75} +{34,91,54,81} +{37,68,89,1,56} +{81,83,39,36,14} +{12,15,2} +{14,16,88,43} +{59,12} +{1,62,21,94} +{29,43,70,52,93} +{29,36,56,78} +{91,56,86,89,53} +{14,83,39,94} +{29,58,72,4,45} +{76,56,84,28,58} +{4,52,6,88,43,17} +{21,1,35,62,77,6} +{78,74} +{1,20,93,43} +\N +{30,100,35,94,74,64} +{81,3,21,4} +{9,19,33} +{28,62,40,64,26} +{69,72,26,30,90} +{52,70,78,43} +{91,58,33,22,92,26} +{98,36,96,94,66} +{86,43,82} +{93,52,4,58,51} +\N +{49,61,80,79,90} +{50,81,72} +{57,29} +{54,31,36} +{52,31,6,48,2} +{4,51,37,83,17} +{60,20,94,82,18} +{52,64,26,81,69,61} +{39,8,22,2,8} +{31,25,95,99} +{11,72,30,95,20,28} +{78,87} +{21,40,98,41,73,33} +{67,88,42,62,11,47} +{85,1} +{4,68,100,72,24} +{82,43} +{97,55,47,52} +{51,52} +{20,21} +{69,46,34,59,54,61} +{9,31,43} +{68,20} +{73,63} +{71,12,93,8,48,10} +{44,46,42,91,21} +{98,52} +{45,60} +{95,38,30,3} +{27,77,2,46,53,18} +{99,5} +{79,33,34,48,82} +{3,29,82,72,35} +{73,75,83} +{25,43,37,26} +\N +{51,95,40} +{18,23,10,90,15,20} +{85,66} +{25,76,22,87,88,18} +{92,4} +{27,51} +{25,77,12,37} +{44,52,69,39,21,63} +{94,30,74,36} +{60,18} +{62,88,94,93,26} +{5,72,96,25} +{99,1,85,98,85,70} +{33,21,37,19} +{44,78} +{47,2,73,32,3} +{91,35,10,81} +{80,64,7,45,84} +{86,16,96,8,88} +{32,29,84,81,30,8} +{51,28,6,16} +{88,51,50,54,56,96} +{79,19,41,40} +{40,26,10,26,2} +{60,34,3,29} +{68,80,70,56} +{60,23,39} +{50,69,6,71,70,25} +{98,53,94,14,45,11} +{98,39,64,89,98,32} +\N +{45,5,15,23,41,63} +{54,31,55,58,32} +{36,56} +{38,78,65,4,75,38} +\N +{40,6,93,40,13,59} +{42,50,10,65,96} +{6,94,49} +{63,44,36,55} +{40,79} +{39,75,27} +{8,31} +{81,75} +{99,82,85,34,24,89} +{86,82,20} +{63,96} +{47,83,29} +{70,46,48} +{44,11} +{94,19,84,79,77,22} +{68,47,100,48,65,77} +\N +{76,12,86,58} +{13,14,79,61,12} +{68,65,16,93,89} +{95,18,29,89,92,43} +{19,12,50,47} +{82,93,85} +{71,40,85} +{95,96,100,86} +{2,40,71,36,25} +{11,95,25} +{79,46,41,35,39} +\N +\N +{88,29} +{54,14,94,88} +{59,67,81,41} +{46,68,78,56,47,30} +{5,76,87} +{23,89,47,46} +{47,98,14,31,1,60} +{32,14,96,61,37} +{79,66,93} +{98,1,77,44} +{21,68,2,31,17} +{94,23,15} +{48,47,57,94,49,71} +{54,3} +{99,40,81,86,81} +{85,12,98,81,5} +{60,41,21} +{38,82,55,41,96} +{11,98,12,69,93} +{11,70,66,44} +{23,92,80} +{10,8,43,97} +{17,30} +{78,56,58} +{84,87,84} +{12,32,7,58,47,48} +{29,46} +{87,34} +{59,30,72,85,71} +{67,48,83,98} +{35,10,73,71,1,77} +{21,51,16,60,64,12} +{36,61} +{54,98} +{44,74,84} +{83,14} +{71,52,48,48,15,92} +{79,78,98,35} +{52,29,47,86,96} +{10,37} +{21,25} +{57,22,28,30,97,47} +{15,28} +{88,24,98,45,65} +{78,42} +{36,70} +{40,48} +{72,3,78,69,57,33} +\N +{21,96,16,21,75,23} +{55,5,72,45} +{99,2,72,29} +{48,17} +{84,84,40,1,59} +{34,11} +{34,80,45,31} +{56,82,25,65,22,64} +{10,4,55} +{74,67,42,74,80} +{84,22,42,6,87,30} +{6,51,89,2,84,78} +{19,95,93,87,8} +{45,84,25} +{7,12,16,92} +{89,82,16} +{22,64} +{16,31,49,48,45,14} +{69,64,19,14,39,8} +{40,96,26,48,65} +{17,45,4,57} +{73,8} +{85,89,1,15,74,51} +\N +{57,89} +{25,12,55} +{39,62,35} +{85,88,71,98,83} +{64,63,75,72} +{100,40,38,1} +{2,44} +{13,46,59,43} +{87,9,93,50} +{77,7,11,30} +{61,11,18} +{19,25,68,83} +{67,25} +{54,18,85} +{96,81,38,11,3} +{87,32,47,79,62,56} +{42,49} +{41,65,24,13,79,75} +{85,32,96} +\N +{3,63,47,84,67,13} +{53,57,59,61} +{95,27,8,89,35} +{76,78,76,76,14,37} +{31,62,65} +{97,57,60,80} +{18,81,93,67} +{8,10} +{65,25} +{68,1,62,63,64,88} +{27,56,74} +{29,61,78,40} +{54,72} +{96,30,71,21,99} +{67,11,67} +{26,65,31} +{89,90,89,68} +{56,39,63,39} +{50,67} +{72,100,24,84,9} +{29,57,65,37,3} +{72,75,79,30} +{78,44,87,67} +{100,19} +{35,60,82} +{16,83,69,38} +{29,98,13,60} +{42,60,87} +{18,67,60} +{31,77,50} +{3,22,40,59,7} +{82,80} +\N +{32,92,70,30,18,35} +{48,38,92,82} +{10,92,66,59} +{4,67,42,21,71} +{27,88,20,21,9} +{46,22,27,85,36} +{42,55,36} +{24,2,96} +{96,48,40,48,52} +{15,5,90,10,68,20} +{30,2,67,92,96,63} +{16,82,87,26} +{88,98,76,29} +{29,11,94,23} +{58,20} +{52,18,55,73} +{20,81,52,19,37} +{93,21,97} +{2,77} +{46,91,80,48,71,20} +{87,7,93} +{68,77,61} +{59,33,52} +{67,62,89,2,62,90} +{30,82,72,44} +{72,18,60,38} +{11,14,59} +{74,65,54,58,67,66} +{74,56,40,73,50,66} +{42,17,56,59,53,19} +{75,25,76,9,72,50} +{14,57} +{61,47} +{90,11,72,13} +{52,27} +{80,84,53,55,98} +{16,26,55,17,79,96} +{42,73,77} +{6,84,67,54,96} +{99,48,99,63,73,77} +{5,41,72,5,88,81} +{19,20,20} +{21,89,55,44} +{82,67,11,64,61,5} +{44,34,8,62,53} +{75,53,66,36,100} +{46,65,6,70,4} +{84,10,56,35,18} +{65,60} +{88,56,27,11} +{10,9,97} +{97,49,100,100,76,32} +{2,98,57} +{47,57,84,74,79} +{80,9,24} +{96,33,86,28,19} +{43,76} +{46,14,55,92} +{60,69,66,62,22} +{45,85} +{45,9,36,13,45,1} +{24,49,8,37,66,64} +{98,53,96,47,2} +{36,44,32,4} +{77,36,78,51,63} +{82,36} +\N +{54,55,33,45,69,18} +{82,93} +{65,59,2,62,10,25} +{75,70,76,69,7,23} +{10,34,67,85} +{94,66,28,40,64,41} +{35,73,64,28,45,68} +{75,2} +{58,49,4,87,19} +{91,99,11,66,75,70} +{26,64} +\N +{13,51,18} +{39,33,21,18} +{27,50,82,2,3,71} +{51,89,44,53} +{88,91,34} +{45,96,27,12,51,52} +{31,96} +{2,9,54,89} +\N +{57,99} +{87,84,70,7,98,42} +{32,80} +{57,64,28} +{24,39,76,4,30} +{59,38,15,45,47,28} +{71,20,37,1} +{72,59} +{7,44} +{50,37,18,1,58,40} +{13,18,21,56} +{72,3,26,74,91} +{60,22,71,49} +{55,82,61,8,48,66} +{28,22,75,41,52} +{51,63,27,41,16} +{59,89,40,85,86} +{12,1} +{52,11,6} +{37,10,43,88,15,7} +{14,94,81} +{34,56,57,4} +{81,43,11,88,74,76} +\N +{67,10,50,79,70,35} +{14,51} +{49,50,23,84} +{51,41,57,100,19,14} +{31,55,40,96} +{8,42,33} +{83,34,1} +{56,80,22,93} +\N +{8,77,91} +{58,39} +{55,30,74} +{50,22,63,73} +{80,19,67,70,18} +{7,99,45,23,59,78} +{36,97,10,33,22,45} +{43,78,90} +\N +{1,68} +{63,95,54} +{5,67,61,37,89} +{32,97,2,56} +{83,31,6,80,63} +\N +{34,15,30,40,16} +{13,43,6} +{35,86,31} +{45,59,4,95,26} +{63,48,25} +{56,97,89,45,87,21} +{42,81,69} +{49,99,87} +{81,21,15,36,70,2} +{93,41,53} +{54,71,82} +{88,90,51} +{100,35,18} +{88,81} +{76,16,87,14} +{16,83,81,44} +{16,53,100,91} +{55,75,92} +{27,97,76,88,66} +{14,100,95,95} +{95,84,93,29,67} +{32,10} +{82,12,51} +{40,6,18,14,29,70} +{3,100,81} +{83,69} +{35,63,31,15} +{5,100,81,54,37,78} +{99,76,33} +{88,85,16} +{46,20,15,10,6,90} +{53,15,75,76,94} +{5,76} +{16,7,21,70} +{3,84,15} +{29,58,73,91} +{82,39,64} +{49,66,83,76} +{79,49,19,67,18,76} +{9,56,41} +{12,22,19} +{62,54} +{20,73,40} +{34,53,58,68,96} +{97,14,61,63} +{38,55,90,63} +{83,78,81,29,12,46} +{96,97,40,89,10} +{67,33,19,19,74,47} +{78,31} +{92,74,93} +{59,54,90,52,29,87} +{92,39,55,89,81,21} +{20,85,64} +{13,97} +{88,18,85,24,54,90} +{67,51,47} +{27,29,90} +{48,27,7,92} +{100,37,24,41,68,66} +{45,7,100,83,51} +{34,10} +{60,36,44} +{55,46,4} +{86,64} +{61,77,98,64} +{14,82,14,50,1} +\N +{53,31} +{64,43,35,44,98,75} +{98,15,52,58,76} +{55,94,92,40,80} +{1,14,100,42,45,74} +{13,90,84,97,18,92} +\N +{13,91} +{67,33,15} +{18,96,38} +{95,70,34,100} +{17,29,64,32} +{19,14,83,69,60,99} +{69,29,64,61,45,17} +{78,48,24} +{40,60,61,93,17} +{19,89,22,71} +{48,8,13,11,56} +{75,18,77,100} +{29,78} +{51,92,97,31} +{83,5,2,97,68,69} +{39,86,86,94,41} +{66,21,27} +{30,84,11,60} +{50,61,28,46,38,45} +{12,59,66,80,15,64} +{69,22} +{30,54,58,99} +{14,28,80,22} +{44,31,14,61,83,72} +{55,53,78,91,76,55} +{43,3,90,22,7} +{51,34,24} +{3,99,5,72,82} +{95,38,61} +{22,8} +{78,40,93,65,18,26} +{21,17,19,8,89} +\N +\N +{94,88,27} +{49,45} +{67,24,64,86,18,1} +{5,33,18,84,51} +{15,71,89,48,94,81} +{71,69} +{98,63,73,64} +{14,75,12} +{47,42,88,13} +{35,51,60} +{63,41} +{73,11,66,99,8} +\N +{2,17,6,44,97} +{95,24} +{2,13,35,21} +{76,29} +{81,37,21} +{23,63,27,53} +{70,66,58,27,4} +{69,62,22} +{62,96,44} +{68,87,99} +{51,40,81,52,93} +{81,11,45,92,22,21} +{5,39,46} +{44,7} +{14,63,62,9,12} +{9,19,90,72,51} +{70,61,24,36} +\N +{29,19,3,30} +{76,86,28,58,38} +{59,27} +{9,65,65,10,37,6} +{89,51,50,23} +{65,2} +{33,51} +{25,55,69,55,1,78} +{76,71,93,46,23} +{70,30,50,11,2,89} +{74,39} +{4,29,22,80,15,23} +{16,30,69,76,61,67} +{43,34,4,70,36} +{59,32,25,93,32,98} +{64,4} +{52,33,47} +{31,49,7,62,7,95} +{44,69,12,45,34,8} +{81,37,83,35,3} +{24,74,16,89,94,27} +{79,71,72,49,88,35} +{17,96,72,87,48} +{81,18,50} +{11,19,70} +{42,95,42,58,90} +{27,65,83,86,33} +{55,7} +{43,55,92,79} +{97,55} +{85,25} +{93,42,69,44,26,78} +{2,40,46,19,84} +{8,42,16,26,87} +{36,8,42} +{31,47,61,44,13} +{85,97,47} +{27,30,71,88,15,100} +{69,27,4,19} +{3,52,31,62,98} +{64,86} +{91,6} +{76,40} +{57,77,7,40} +{71,34,48,53,37} +{36,72} +{61,99,53,2,31,6} +{86,15} +{52,93,59,51} +{57,27,52} +{48,67,31,69} +{34,90,37,73,60,83} +{71,24,49,59} +{93,71,90} +{77,31,77} +{47,40,32,20} +{97,40,63,80,44} +{88,55,10,40} +{86,36,40,72,38,9} +{31,97} +{56,19,55,62,60} +{53,95} +{33,36} +{50,12,55,42,96,100} +{41,17,100,76} +{65,1,61,69,64,21} +{90,92} +\N +{74,42,86} +{2,4} +{99,78,5,92,1,61} +{1,69} +{80,73,60,31} +\N +{10,25,13} +{50,34,75} +{12,90,6,36,42} +{23,54,46} +{67,28,66,87} +{8,88,88,51,55,32} +{15,19,24,91,75} +{80,16,70} +{41,7,90,37} +{97,57,32,21} +{54,74,29,12,55,78} +{60,76,37,92,44,73} +{1,56,14} +{40,79} +{97,1,30,78,56} +{36,25,61} +{33,3,51,30,38} +{2,94,19,15} +{7,38,72} +{96,18,3} +{18,95,15,62,74,53} +{59,61} +{18,66,66,65,4,33} +{49,83,10} +{17,52,90,39,61,87} +{38,92,55,26} +{8,43} +{77,51,68,23,47} +{27,65,24,43,88,73} +{54,34,30,2,19,62} +{12,36,81,24,66,8} +{38,91,90,99,84} +{51,55,94,97,91,15} +{50,42,20,22} +{70,4,22} +{64,26} +{56,86,16,21,31,61} +{7,19,86,49,10,53} +{81,16,74} +{95,9,11,46,47} +{34,23,16} +{94,38,19,4,4} +{39,79} +{41,3,62} +{84,67,53,90,46} +{17,46,23} +{62,1,5,58,52,1} +{23,83,80,62,19} +{99,61,77} +{51,95,48,96,75} +{39,2,6,95,43,36} +{69,9,59} +{62,97,31} +{75,96} +{33,29,35,13,94,78} +{28,71,16,99} +{72,86,25} +{5,28,15,33} +\N +{13,13,52,20} +{58,98,83,85,81} +{13,75,42} +{7,91,3,83,82,37} +{72,91} +{10,67,61} +\N +{43,86,76} +{36,62} +{64,56} +{63,13,22,24} +{76,49,38,23,9,8} +\N +{92,58,24,19,96,90} +{24,37,76,37,1,95} +{91,9} +{46,35,48,37,91,76} +{72,21,6} +{30,80,39,90,89,18} +{83,30,67,17} +{43,36,46,43} +{4,31,34,36,33,48} +\N +{16,49} +{75,56,8,91,4} +{92,80} +{74,72,68,89,7,82} +{79,17,26} +{14,15,28,72,58} +{42,21,9} +{71,39,98,98,61} +{68,63,23,74,74,48} +{91,80,22,100,57,30} +{63,60} +{90,9,10,67,89,14} +{53,93} +{75,49,34,30,38} +{2,43} +{32,4,24,48,23,31} +{45,24,31,15,51} +{65,62,21} +{83,50} +{10,90,98,86,87,1} +{63,2,9,17,30} +\N +{77,46,60} +{49,39} +{37,86,4,63} +{33,28,37,33} +{4,88,80,14,47,45} +{90,64,17,65} +{60,90,12} +{7,2,38,33} +\N +{39,90,7} +{89,32} +{27,47,63,31} +{54,10,10,73,84,87} +{55,58,25,87} +{41,24} +{71,26,8,31} +{74,19,33,81,74} +{47,58} +{44,16,22,59} +{2,10,97,16,25} +{1,98,3,41,6,80} +{12,13} +{3,50,61,85} +{54,5,44,97,71,86} +{54,72,94} +{59,13,28,79} +{73,68,7,13} +{90,49,63,45} +{95,47,84} +{31,79,98,22} +\N +{13,15,83,89,87,20} +{1,58,87} +{15,21,39} +{93,27} +{40,81,13,31} +{29,52} +{28,48,36,41} +\N +{71,23,89} +{29,59,31,45,35} +{49,83,24,19,44,26} +{41,61,36,34,38,88} +{66,17,18,9} +{55,38,93,33} +{84,42,71,15,12} +{11,38,78,80,90,92} +{1,6,28,68,58} +{96,63,73,22,74,29} +{65,97,68} +{92,29,92,36} +{47,25,30} +{25,44,67,95,16} +{7,26,41} +{79,12,44,69} +{17,27,4,60} +{45,30,57} +{68,24,63} +{39,64,94,92} +{27,68,39,68,75,8} +{88,48,48} +{86,86,8,54,7,45} +{93,60,14,90,14} +{97,42,54,67,38} +{13,38} +{84,34,30} +{34,71,77,71,13} +{82,18} +{53,7,79,79} +{28,65,38,20,93,100} +{96,10} +{94,12,93,48,51,20} +{12,4,41,11,25,59} +{95,69,23,25,1,19} +\N +{44,38} +{12,4,96,7,48} +{18,24,52,81,58,77} +{15,36,1,50,81,23} +{39,66,74} +{52,22,99} +{51,11,77,44,22} +{51,19,18,91,75} +{20,17,5,96,63,30} +{31,56,9,21} +{45,70,31,62,9} +{84,22} +{99,62,97,44,87,90} +{95,94,95,24,25,47} +{79,72,57,18,3,26} +{54,67,37,57} +{3,90,9,3} +{95,90,40,7} +{36,70,76,68,14,71} +{15,59,7,1,48} +{91,29,79,62,94} +{76,36,92,82} +{50,79,68} +{55,63,88,87} +{86,89,49,17} +{19,74,14,52,8,59} +{8,58} +\N +{77,74,20,39,26,29} +{38,89} +{58,21,44,81,17,16} +{40,72,12,32,90} +{93,34,92,17,39,85} +{39,2} +{43,21,83} +{81,3,59,28} +{34,97,52} +\N +{84,90,6,74,43,70} +{41,6,10,98,86,41} +{13,72,78,11,37,5} +{100,40,54,75,33} +{66,31} +{58,58,75,83} +{81,90,8,73,87,41} +{9,63,22} +{19,66,19,93,52} +{39,88,13,25,66} +{80,85,66} +{66,76,11,71,97,77} +{70,35,87} +{36,17,69,2,41} +{30,85,65,39,38} +{39,35} +{64,100} +{83,53} +{25,29,29,72} +{19,63} +{32,2,82,15} +{31,31,46,11,2} +{41,1} +\N +{55,41,15} +{18,61,43,22,100} +{47,60,16} +{80,5} +{52,2,76} +{40,26} +{81,12,16,25} +{31,93,89,20,95,75} +{26,75,86,1} +{36,69,70,73,79} +{38,39} +{45,49,52} +{88,53,45,10,49,31} +{21,14,1,83} +{7,71} +{59,38,83,64,44} +{6,52} +{99,99,26,54,47,8} +{13,46,72,5,23} +{7,86,40,73,55} +{28,47,50,62,44} +{32,89} +{39,48,50,100,62,95} +{66,56,11,21,58,59} +{7,44,95,53,95,36} +{83,33,79} +{34,65,51,52} +{67,95,46,45,61} +{69,84,71,38,46} +\N +{24,57,48,27,97} +{83,91,97,94,37,44} +{22,31,38,77,21} +{72,32,53} +{30,45} +{93,94,27,95} +{95,4,79,3} +{33,90,92,54} +{55,8,76,39,85,64} +{82,54,93} +{31,42,5} +{38,14,73,12,14} +{64,13,64,28,32,89} +{5,28,4,22,72} +{37,78,94} +{58,73} +{24,57,33} +{48,28} +{69,42} +{97,91,75,84} +{95,69} +{64,95} +{1,3} +{76,38,81,11,90} +{21,30,54} +{92,100,97,21} +{10,76,64} +{85,79,100,79,76,63} +{13,96} +{91,47,84} +{100,19,45,49} +{99,71,21,10,69} +{19,41,7,63,56,85} +{16,32,6,92} +\N +{62,7,22,65} +{1,86,67,47,83} +{26,2,100,51,1} +{20,22,86} +{74,95,79} +{8,53} +{85,59,61,45,83,8} +{2,76,63,26} +{40,42,84,55,56,23} +{37,7,25,14,2,47} +{86,16,98,41,33} +{76,30} +\N +{16,88,61,4,41,42} +{59,92,94,76} +{96,76,57,62,99,61} +{14,30,23,13,9,32} +{47,49,86} +{48,19} +{73,25,40} +{29,75,31} +{53,26} +{28,95,78,84} +\N +{22,77,13,64,68} +{15,69,82,26} +{42,37} +{64,59,95} +{37,72,86,95} +{9,59,92,57} +{65,37,13} +{93,67,81,54,89} +{21,52,78,59,30} +{98,90} +{17,35,57,4} +{44,56} +\N +\N +{25,26,13} +{62,41,60} +{28,92,16,74,4} +{92,19,85,77,11} +{20,67,85,22} +{75,69,34,29,64,73} +{70,40,2,29} +{87,27,70,54,6} +{10,8,9,62} +{71,41,14,22,23} +{83,79,46,37,99} +{79,42,3,54,20} +{12,60,42,100,39,33} +{13,79} +{95,28,54,52,77,3} +{55,50,25,41,42,16} +{96,67,23,54} +{65,54,32,52,16} +{100,11,69,96,95} +{1,18,93} +{53,78} +{24,40,47,30,40,11} +{87,7,12,10,52,90} +{3,72,95,15,32} +{60,69,19,8,43,72} +{88,10,11,55,37} +{67,48,31,48} +{98,70,38,97,14} +\N +{52,12,94} +{41,26} +{81,65} +{66,74,9,66,12,3} +{47,6,33,92} +{95,2,12,90,73,97} +{23,76,34,23,2,20} +{7,22,37,83,15} +{44,61,21,8,36} +{88,52,8} +{66,3,67,43,87} +{16,51,10} +{66,43,28,69,70} +{47,2,94} +{57,67,55} +{40,59,6} +{63,19} +{51,71,68,28} +{73,97,48,56,70} +{3,4,28,48,18} +{31,94,27,70,44} +{85,18,40,6} +{78,91,79,88,33} +{11,90,78,49,97} +{74,91,27,79,75,53} +{1,70,3,40,43,99} +{97,35} +{58,27,40,6,47,33} +{43,42,60,94} +{41,34,23,53} +{57,44,50} +{8,10} +{49,53,22} +{91,2,90,13} +{46,80,27,82,42,99} +{12,96,72,23,83,56} +{48,82,71,8,35,16} +{38,69,38,49,47} +{80,28,13,9} +\N +{84,13,12,33} +{31,57} +{68,86} +{4,96,64,19,48,29} +{66,8} +{33,86} +{32,38,86,86,41,84} +{38,51,31} +{59,17,76,36} +{52,87,60,54} +{7,58} +{34,52,58,90} +\N +{30,67,97,2,1} +{93,10} +{47,16,46,8,39,84} +{90,77,37} +{92,58} +{38,94,49,53,11} +{70,49,35,67,18,28} +{58,81} +{79,100,9} +\N +{97,13,56} +{99,40,87,67,58} +{24,47,19,16} +{12,27,47,48,3,59} +{1,58,15} +{97,28,6} +{94,50,31} +{71,34,94,53} +{26,5} +{46,66,56,27,37} +{76,4,1} +{80,63,40} +{89,82} +{39,100,71,82,95,8} +{81,86,27,83,57,47} +{30,30,92,8,33} +{95,20} +{4,19,8,74} +{20,32,83,62,19,18} +{75,29} +{100,13,6,41,23} +{63,5,93,72,43} +{64,13,73} +{35,91,61,26,41,96} +{49,56} +{2,28,80,84} +{15,48} +{32,49,96} +{72,73,57,69,16} +{95,1,88,64} +{70,55,88,66} +{76,66,30,92,1} +{88,21,74,65,93} +{72,75,75,3,26} +{55,32,85,68,84} +{45,40,93,33,72,20} +{83,89,6} +{4,60} +{72,56} +{73,7,69,25,96,74} +{100,72,41,48,63,37} +{21,72,70,94,67,54} +{6,9,58,77,35} +{70,59,35,25} +{86,96,87,62,13,5} +{93,52,74,57,58} +{93,23,88,50,56} +\N +{95,72,68} +{63,52,58,41,54,90} +{52,23,53,32} +{93,87,39} +{23,73,6,46,79,72} +{44,17,12} +{79,59} +{31,62,14,26,75,23} +{64,72,18,48,63,50} +{71,40,59,87} +\N +{82,17,10} +{44,29} +{6,4,39,16,21} +{94,17} +{91,61,37,36,9} +{53,38,7,28,92} +{95,93,35,18,48} +{35,77,53,87,97,92} +{56,28,68,19,28,86} +\N +{23,91,56} +{97,5,89,24} +{18,81,17,78,63} +{83,19,46,10,22,66} +{100,17,45} +{25,87,61,79} +{17,57,99,1,39,1} +\N +{2,51,26} +{93,69,84,85,87} +{40,58,70} +{86,84,96,41} +{28,36} +{39,85} +{16,84,75,68,87,17} +{14,84,57} +{25,85,35,82,56} +\N +\N +{7,30,17,2,66,91} +{45,17,57,27,98,65} +{57,86,15,40,68,23} +{82,32,28,89,41,79} +{28,3,35,61} +{76,95,19,81,48,50} +{34,6,85,47,65,2} +{70,23,91,33,15} +{30,24,47,96,61,47} +{78,88,64,60} +{87,40,86,97} +{47,14,54,37,100} +{48,95,32,77,69} +{58,12} +{63,20,49} +{78,85,41,72,6} +{39,20,89,21,62,76} +{71,6,10} +{63,4,71} +{51,21,37,63,54} +{66,6,63,12,58} +{89,97} +{64,70} +{53,1,65} +{57,73,30,26} +{15,99,47,89,95,99} +{12,86,7} +{50,68,1,31,67} +{47,86,54,44} +{78,7,86,76,22} +{46,71,98,62,67} +\N +{64,91,80,63} +{82,61,17,58} +{85,64,90} +{37,26,64,97} +{68,25,26,61,68} +{11,21} +{63,53} +\N +{87,88,75,65,10,48} +{32,7,38,72,44} +{99,81,59,10} +{31,58,60,66,41,28} +{23,27,57,74,4} +{20,94,28,29} +{91,5,15,61,50,29} +{34,58,15,85,65,29} +{52,50,2,95,87} +{3,94,54} +{7,61,96,49} +{51,70,23} +{87,49,27,6,7} +{83,61} +{36,92,48,57,20,83} +{53,12,60} +{60,11} +{68,43,74,23,66,55} +{66,8,54,24} +{48,72,41,74} +{81,99,50,33,20,13} +{27,80,60,83,26,74} +{80,1,59,50,15,99} +{11,70,20,29} +{23,84,63} +{63,24,91,19,28} +{25,17,95} +{94,13,81,69,26,89} +{31,48} +{45,20,74,51,62,33} +{77,55,17,63,4,18} +{89,14} +{85,85} +{23,11,85,74} +{29,76} +{62,40,96} +{1,29,25} +{56,26,12} +{5,22,6} +{61,9,6,85} +\N +{31,34,49,11,19} +\N +{14,20,64,73} +{63,1,85} +{2,58,61,100,9} +{89,92} +{37,13,81,77} +{36,26,16,76} +{78,10,10,92,63} +{68,6,35,71,92,27} +{2,88,33,14,85,27} +{80,95,71,98} +{8,33,33,55,90} +{62,74,15,10,64} +{60,18} +{6,77} +{27,38,4,49,27,89} +{94,84,94,8,98} +{15,73,47,47,26} +{73,38,69,90,9,13} +{17,33,32} +{51,57,25,40,41,37} +{77,70} +{66,10} +{50,90} +{96,88,30,65} +{30,49,100} +{34,46,19,89,52,24} +{83,85,62,72,10,64} +{98,56,23,77,79} +{97,90,83,85} +{19,66,70} +{70,89,59,12,71} +{24,96,22,4} +{43,32} +\N +{92,85,41} +{96,90} +\N +{4,5,82} +{58,32,34,86,30} +{51,8,44} +{31,96,37,47} +{51,15,41,97} +{86,41} +{41,26,61} +{62,79,68,73,5} +{32,9,88,30} +{89,34,64} +{70,18} +{64,31} +{14,73,1,50,75} +{57,1} +{53,92,38,13,56} +{41,1,87,40,60} +{83,75,19} +{69,98,25,64} +{69,75} +{84,13,25,8,81} +{41,52} +{90,80,17} +{19,53,72,62,94} +{29,30,99,32} +{32,85,73,26,47} +{6,48,89,93,23} +{73,47,93,10,48} +{60,21,26,60,63} +{85,41} +{75,61,61,45} +{51,7,5} +{9,46} +{83,36,7,84,96} +{71,78,55} +{43,53,88} +{8,1,80,69} +{88,86,51,12,37} +{45,69,40,85} +\N +{36,53,60,15,7,7} +\N +{92,5} +\N +{51,13,34} +{39,23} +{16,26,93} +{91,96,19} +{89,64,2} +{8,74,29,24,66} +{26,19,30,27} +{81,59,52} +{99,28} +{5,12,63,79} +{14,80,90,83,47,79} +{67,64,32,58,44,19} +{27,32,52,79,55} +{68,87} +{14,31,20,12} +{38,99,65,32,15} +{27,57,35,17,53} +{63,64,6,60} +{70,38,47,65} +{24,87,20,4} +{86,27,19,56} +{62,44,1} +{46,10,26,48} +{40,57} +{61,9,59,80,51,20} +{83,44} +{77,1} +{78,63,42} +{75,93,95,76,9,52} +{20,58,10,37} +{72,75,41,73} +{63,93,5} +{57,65,47} +{34,6,51,38,21} +{54,7,19,9} +{61,6,47,64,100,86} +{39,45,55,17} +{81,53,67,33,70} +{11,94} +{57,98} +{78,81} +{75,71,20,8,13} +{3,2,58,95} +{37,58,5,46,54} +{40,50,36,27,69} +{73,42,86} +{97,73,87,80,38} +{27,56,94,73} +{80,81,74} +{53,79,86} +{79,4,55,21,34,74} +{84,63,21,97,92,38} +{72,38,76,63,97,79} +\N +{64,91,100,98} +{34,10} +{97,73,7} +{49,31} +{87,39,65,96} +{54,88,60,55,18,4} +{20,72,96,26} +{40,51} +{37,46,89} +{88,53,3,52,39} +{10,34,77,95} +{20,66,84,12} +{51,19,61} +{67,35} +{73,56,89,43,35} +{94,54,27,63} +{63,53,21,79,76,49} +{79,23,28,63,49} +{47,94,75,11} +{67,95,56} +{80,86} +\N +{62,73} +{98,69,11,57,24,90} +{87,41,77,21,94} +{21,87} +{3,40,75} +{67,53,78,29,16} +{18,46,70,76,98} +{14,67,50,63,22} +{4,2,92,4,8} +\N +{41,76,79,95,96,61} +{35,30,18,57} +{34,91,89,27} +{22,25,9,34,85} +{4,53} +{23,6,65,86,56,93} +{54,81,8,59,36,47} +{90,10,4,25,31,46} +{91,82,82,80} +\N +{64,12,59,21,10} +{49,93,76,26} +{22,10,21,15,57} +{14,29,93,31} +{68,21} +{62,95,12} +{34,74,55,4} +{26,39,93,31} +{67,31,63} +{23,89,98,88} +{48,93,22,79,28} +{1,88} +{95,74,84,18,38} +\N +{82,29,22,45,15,81} +{15,48} +\N +{17,36,97,77} +{93,59,71,15,51,35} +{67,33,57,11} +{35,80,72,43} +{69,89,69,48} +{52,29,16,52,100,22} +{60,30,45,19,25} +{28,3,39,86,13} +{81,40,25,20,39,5} +{77,14,93,47,23,6} +{42,19} +{52,52,98} +{9,29} +{78,77,6,72} +{2,59,73} +{13,85,77,26,29} +{64,63,94} +{54,76,3} +{7,1,5,91,100} +{24,94,57,94,79,55} +{4,22,1,75} +{34,53,19,87} +{69,75} +{71,47,47,61,42,89} +{3,32} +{84,61,4,13,73} +{74,61} +{47,65,85} +{50,84,83,18} +{51,97,11,3} +{59,92,4} +{49,42,65,27,97,52} +{19,33,40,44,71,100} +{82,68,99,60,47,59} +{47,33,82} +{3,45} +{47,28,60} +{3,98,60,30,50} +\N +{11,40} +{33,67,72,43,74} +{9,49} +{42,47,48} +{53,88} +{17,87,28} +{20,4,72,62} +{65,25,22,76,64} +{9,62,57} +{59,93,52,93,60} +{85,85,1,55,50} +{69,22,57} +{8,50,81,32,4} +{80,47} +{60,88} +{16,54,80,66} +{99,87,66,65} +{60,19,58,18} +{14,77,66,48,59,41} +{75,96,82} +{42,72,93,79} +{14,23,78,82,40} +\N +{29,47,16,41} +{13,11,45,67,23,92} +\N +{8,3,52,41,56} +{57,41,63} +{5,50,59,87,50,58} +{58,99,9} +{60,99,15,63} +{59,14,9} +{68,81,34} +{83,18,3,94,39} +{27,52,100,66,48,82} +{10,23,50,96} +{72,14,12,68,62} +\N +{45,30,55,86,89,48} +{5,80,97} +{52,67,86,81} +{99,4,38,79} +{21,98,78,71,73} +{10,23,38,61} +{12,17,19,70} +{79,23} +{55,66,65,60,19} +{7,34,68,88} +{37,70,5} +{41,57,86,31,10,6} +{70,59,96,78} +{88,18,32,22,56,21} +{93,72,81,47,89,72} +{100,14,49} +{83,80} +{73,11,97,14} +{60,47,32,34,13,29} +{39,6,88,24,6} +{54,66,55,52,47} +{56,89,88,98,94,48} +{2,37} +{13,54} +{68,39,68} +{60,81,10,85} +{74,54,14} +{30,52} +{41,74,47} +{77,28,8} +{90,3,43,89,4} +{29,46,84,63,79,83} +{26,15,80,19} +{76,28,77,6,47,91} +{51,15} +{93,15,51} +{8,68,34,58,15} +{5,56,57,81} +{27,87,61,54,41} +{31,37} +{68,80,3,98,49,6} +{96,10,39} +{25,19,21,72,79} +{69,1} +{5,51,61,80} +{76,25} +{36,92} +{54,46,31,87,13,8} +{25,13,83,57} +{29,53,73} +{83,60,26,19} +{27,89,34,13,20,38} +{29,74,26,67,65} +{90,36} +\N +{32,15,43,50} +\N +{55,86,68,51} +{91,14,53,70,49,38} +{75,29,79} +{19,59,38,44,18,79} +{43,31,24,20,16} +{43,83,59,37} +{61,17,95,61} +{67,89,1} +{65,20,46,58,49} +{72,54,38,52,49} +{75,12} +{63,95} +{99,17,79,11,35} +{62,60} +\N +{69,83,89,73,20} +{30,60,39,73} +{78,99,29,45,61,21} +{38,61} +{51,15,47,11,4} +{34,75} +{57,26,42,42} +{8,90,4,68} +{63,70,99,3} +{74,70,33,50,59} +{27,18,73,83} +{36,90} +{82,77,2,83} +{90,99} +{15,25} +{65,30,39,82,89,34} +{12,24,64,54,49,83} +{54,59} +{63,49,81,36,75,52} +{6,59,90,55,87} +\N +{97,52,54,97,3} +{8,53,89,42,30} +{68,42,64} +{97,42,99,74} +{19,31,32,52,7} +{69,83} +{61,17,35,39} +{81,47,70,7,63} +{78,10,63,97,31,48} +{84,92} +{64,82,40,39,57,44} +{39,25,92,33,5} +{27,74,85} +{90,67,21,28,84} +{36,33,62} +{77,87,98,82,11,88} +\N +{11,41,17,91,56} +{1,1} +{84,100,8,22,20} +{57,39,85,5} +{55,47} +{13,2,36,59,45} +{95,66,53,32,29} +{21,92} +{35,32,9,58,77} +{19,71,99,82} +{19,37,87,43} +{100,18} +{67,86,29,40} +\N +{66,54,64,55} +{67,25,18,31} +{60,26,59,86,26,67} +{26,21} +{70,67,30} +{93,82} +{89,58,39,91,95} +{15,86,25,8,12} +{59,20,41,33,78,87} +{10,72,89} +\N +{52,17,99} +{77,29,7,7,1} +{49,96,57,24,66,67} +{10,26,83,84} +{82,7,25} +{66,77,57,25} +{92,77} +{24,48} +{44,26,37,75,11} +{73,80} +{51,47,93,21,25,78} +{76,49,15,98} +{12,85,63,59,6} +{25,51,47,58} +{16,10} +{17,30} +{67,5} +\N +{54,96,21} +{12,47} +{29,90,69,22,89,82} +{78,93,86,65,66} +{83,84,58,67,13} +{85,35,81,27,1,2} +{76,29} +{64,82,91} +{35,89,38,89,10} +{19,40,96} +{83,70,85} +{72,85,70,99} +{34,1,39,16} +{84,53,22,86,73} +{32,23,70,49} +{15,67,91,11} +{73,95} +{71,57,64} +{88,91,56} +{12,16} +\N +{62,82,26,84} +{70,51,52,63,96} +{34,93,49,57} +{16,5,47} +{18,59,12,82,83,51} +{61,93,87,9} +{46,9,45,38} +{15,85,28,73} +{31,99,26,3} +{66,91,48,73} +{98,80,9} +{31,55,42,69,13,58} +{43,8,70,29,83} +{39,57,53,70,74} +{89,13,60,38,89,3} +{37,28,15} +{67,77} +{30,100,89,36,53,75} +{36,19,48} +{7,8} +{12,76,26} +{14,56,52,47,39,67} +{87,83,51,2,97,25} +{51,1} +{59,69,37} +{95,93,21} +{100,92,37} +{37,23,66,95,7,63} +{52,56,77,86,46} +{31,62,17} +{57,48,79} +{26,96,40,5,43,54} +{40,92} +{75,83,1,73,71} +{75,61} +{6,38} +{35,23,76} +{52,3,38,25,100,99} +{45,15,44} +{96,9,11,35,16,58} +{9,80,76} +{22,43,34,43,46} +{34,68,21} +{95,70,83} +{60,7} +{34,22,68,2} +{78,30} +{46,70,90,96} +{5,24,69,61,32} +{41,17,79,27} +{59,88,64} +{12,48,41,68,15,98} +{43,84,59,62,36,14} +{84,8,71,88,4,23} +{45,67,67,17} +{14,96,72,66} +{91,23,4,11,28} +{18,5} +{65,51} +{31,87,33} +{17,97,76,81,69} +{56,71} +{95,23} +{33,58,66,47} +{46,99,69} +{43,87,40} +{49,1,26} +{18,36,89,87,25,100} +{76,37,19} +{57,91,9,100,23,59} +{80,60} +{55,23,32,49} +{15,73} +{87,50} +{43,62,50,54} +{65,3,89,49,77} +\N +{73,12,25,78} +{79,89,38,59} +\N +{44,62,25} +{96,13,57} +{35,14,3} +{90,71} +{34,8,59,81,63,90} +{15,90,89,32,69} +{90,61,54,10,29} +{22,3,85,41,66} +{17,4,99,91,45,57} +{89,32,43,39,61,9} +{45,40,6} +{47,100,75,8,85} +{88,43,89} +{45,41} +{54,48,87,66,100,5} +{58,65,39} +{17,82} +{95,14,31,51} +{30,3,46} +{8,66,22,52,51,24} +{61,62,38} +{4,50,83,32,76} +{96,36} +{87,27} +{82,100,44} +{30,91,44} +{29,48,8,38,43,96} +{56,65} +{34,36,99,11} +{11,1,25,65,12,89} +{17,100,62,53,24} +{86,81,63} +{17,63,30,82,87,91} +{12,63,76,78,85} +{52,19} +{21,91,53,86,49,83} +{67,65,78} +{8,77} +{89,1,56,100,72,96} +{20,51,41,21,30,20} +{41,73,37,92,9,5} +{95,34,21,12} +{28,14,2,62} +{14,74,33,32} +{37,82,67} +{65,99,56,11,21,83} +{99,51} +{56,42} +{59,30,74,40} +{18,27,63,44,86} +{48,25,41} +{5,26,63,88} +\N +{24,66,64,1,26} +{72,74,11,61,70} +{28,27,90,30} +{96,35,21} +{64,100,75,94,88,3} +{93,79,42} +\N +{37,51,4,41} +{31,68} +{93,42} +{76,96,47} +{8,6,16,57,51,72} +{67,72} +{50,36,40} +{69,28} +{17,92,40} +{72,74} +{76,87,93,22,95,30} +{14,88} +{39,56,74,36,25,87} +{55,68} +{32,9} +{35,2,17,86} +{92,73,82} +{40,13,95} +{15,28,95} +{65,40} +{47,56} +{63,72,78,20,22} +{71,49,4,80} +{68,16,50,44,29,38} +{81,96,23} +{44,73} +{4,68} +{30,54,41,66,89} +{92,33} +{10,92,49,46,59,42} +{14,91,18,96,27,37} +{40,32,12} +{14,97,15,96,44} +{75,96,52} +{50,20,9} +{39,84,83} +\N +{14,48,3} +{47,85,76,27} +{5,3,25} +{55,36,29,76,41,44} +{34,56} +{62,29,83,6,58} +{67,32,85} +{75,62,4,66,100} +{47,31,27,43,9,57} +{92,44,36} +{31,22} +{14,88} +{18,25} +{82,63} +{54,67,6,59} +{90,42,19,91,37,75} +{70,39,87,52,32} +{51,20,34} +{85,62} +\N +{95,6,55,93} +{44,67,15} +{93,58,20,12} +{42,6,22,29,36} +{46,81} +{57,95,56,52} +{3,79,69,45,8,74} +{75,44} +{4,17,78,96,66,41} +{27,100} +{85,76,22,17,45,58} +{9,12,70,29,96} +{5,68} +{54,79,5,19,17,24} +{99,13,9,52,86} +{94,6,99,57} +{71,62} +{63,50,9} +{42,42,80} +{25,96} +{93,20,10} +{83,73} +{14,76,36} +{57,31,29} +{17,25,18,18,54,95} +{34,27,86,37,92,83} +{57,57,28,32} +{98,53,60} +{8,59,41,88,49,46} +{95,42,30} +{12,51,98,74,76} +{6,49,26} +{21,35,27,32,83,93} +{16,56,89} +{85,34,73,74} +{52,95,22,4,71} +{96,42,63,88,80,91} +{78,34,41,99} +{11,68,27} +{50,14} +{78,52,66,15} +{100,82,1} +{35,2,93,71,45} +{4,56,8} +{83,19,5} +{82,39,63} +{50,64,83,87,76} +{47,59,93,88,22,67} +{16,6} +{86,98,88} +{32,4,52,34,30,71} +{68,25,97} +\N +{19,17,91,84} +{97,88,89,98,33} +{37,56,70} +{27,17} +{56,58,51} +{69,80,47,84} +{89,22,89,88,16,1} +{95,14} +{14,95,97} +{47,15} +\N +{19,20,65,74,83,38} +{57,56} +{78,67,68,89,1,95} +{61,84,93} +{10,56,96,31,56} +{3,51,90} +{15,85,42,25,15,41} +\N +{50,7,89,89,96} +{90,10,44} +{11,43,15,27,30} +{55,68,48,30,44} +{38,69,3,95,39,6} +{57,51,88,94,82,23} +{69,37,2,67,49} +{93,94,5,84,39,47} +{45,47} +{58,55,79,63,64} +{63,65,59} +{42,36,76,75,89,86} +{41,83,98} +{13,90,13,46,11,37} +{76,33,52,65} +{52,29} +{20,60,45,23,29} +{89,6,14,8} +{91,69,64,72,41} +{46,91,31,66,83,33} +{6,58,61,65} +\N +\N +{90,65,16,5} +{24,46,33,36,47,45} +{11,62,40,98,21,88} +{28,95,58,33,27} +{45,63,99,31,38,90} +{11,49,41} +{23,24,82,25,28} +{42,3,34} +{52,10,58,88,97,37} +{20,41,11} +{86,30} +{36,92,93,10} +{5,36,85,50,71} +{51,75,100,46} +{55,81,31,45,87,8} +{83,10,45,81,33} +{16,94,91,23,76,44} +{62,73,14,39} +{16,14,83,100,82,7} +{25,69,86,12,71} +{29,86,45} +{76,62,100,47,57,52} +{41,21} +{33,56,58} +{23,96,44,16,91,86} +{65,15} +{3,92,56,4,21} +{32,39,95} +{95,87} +{65,96} +{16,96,93,100,35,78} +{64,33,55} +{96,75,41,40,62} +{50,50,86,11} +{93,34,83} +{19,30,62,67,93,19} +{53,67} +{55,46,99} +{70,32,38,4,84,91} +{50,36,40} +{21,93} +{29,6,10} +{4,73,45} +{72,33} +{36,73,18,55,27,100} +{65,73,98,90} +{20,1} +{59,36,60,87} +{20,79,63,93,34,31} +{60,18,92,6} +{48,34} +{63,70,78,1,2} +{15,32} +{5,15,84,73} +{32,35,90,11,40,23} +{91,41,7,52} +{84,90,88,30} +{12,10} +{84,86,36,79} +{76,45,84,66} +{41,25,61,96,97} +{18,100} +{63,39,17,34,32} +{22,45,74} +{83,24,45,48,69,84} +{43,41,12,44,75,91} +{69,75,95} +{100,28,14,66,1,14} +{94,91,60,36} +{88,28,54,63} +{68,78} +{29,68,6,100} +{12,84,35,44,59,55} +{30,59} +{64,18,40,57} +{97,97} +{85,64,73,82,49,88} +{99,31,24,6,90} +{23,89,38,20,40,95} +{84,64} +{21,3,91,7,7,87} +{91,74,32,76,43} +{13,22,96,8,75} +{59,71} +\N +{34,94,45} +{14,5} +{95,10,37,74} +{69,82} +{6,58,45,49,81} +{72,72} +{17,58,10} +{62,77,9,6,44,62} +{37,53,49,41} +{24,11,11} +{10,57} +{26,72} +{18,15,83,60,54,80} +{88,49,73,92,67} +{26,88,64,2,59} +{49,50,3,90,44,49} +{58,54,43} +\N +{86,78,40} +{42,17,65} +{1,86,17,6} +{79,27,37,60,8} +{46,62,46,22} +{9,75,17,68,54,35} +{99,86,64,10,20} +{3,21,35,6,24,64} +{25,62,9,50} +{63,2,79,42,81} +{44,41,2} +{99,93,98,78} +{2,92,9,96} +{79,82,25,64} +{47,84,52} +{97,77} +\N +{47,94,38} +{22,33,76} +{35,52,11} +{17,48} +{1,100,27} +{87,93,19} +{72,3,32,78,81} +{47,28,4,23,79} +{27,88,7,85} +{49,40,47} +\N +{91,89} +{80,2} +{86,78,42,6,81} +{7,50,25,4,8,22} +{23,3,64,59,53} +{1,42,63} +{95,81,86,31} +\N +{81,83,52,47,25,43} +{17,57,100,49,59,63} +{44,91,95,72,29,100} +{80,78,55,41} +{14,52,20,64,9,87} +{48,14,82} +{31,5} +{64,50,66,38,97} +{61,2,90,2,64} +{64,69,26} +\N +{64,62,68,89,12} +{12,10,88,71} +{41,66} +\N +{67,77,25,6} +{14,75,15,66,19} +\N +{88,52} +{78,56,61} +{93,88,47,38,52} +{72,100,54,34,18} +{77,99,89,53,25} +{38,51} +{3,25} +{83,39,85} +{60,15,77,59,69} +{38,64,91,97} +{65,35,30,8} +{46,6,48} +{63,91,29,91,85} +{43,100,56,60,74,53} +{95,30} +{86,63,28,62,37,79} +{2,48,29} +{1,44,20,47,56} +{43,34,86,86,64,14} +{11,82,99,71,63,41} +{77,45,74,17,56} +{18,25} +{51,82} +{27,35} +{1,20,84} +\N +{89,37,16,90} +{58,83,34,88,50,21} +{61,25,1} +{41,6} +{9,100,32,54,38,66} +{40,53} +{29,76,16,13,55,31} +{71,67,54,83,3,82} +{19,62,18,94,73,38} +{17,83,8,45,52} +{80,25,50,59,53} +{4,2} +{52,48,6,72} +{50,32,70} +{36,97} +{17,82,36,97,20} +{22,87} +{46,29,96,98,14,90} +{14,92,5} +{69,9,68} +{20,86,29,61,54} +{62,67,87} +{86,18,31,80,82,45} +{65,89,67,34,41} +{44,8,48,38,91} +{47,32} +{85,25,56,39} +{15,54} +{84,57,44,46} +{65,61,29,86,77,53} +\N +{26,58} +{76,1,57,93} +{57,91} +{13,15,66,11} +{84,12} +{43,32} +{83,24,31} +{82,9,65,84,27,94} +{62,93,55,7,39,46} +{90,100,33,22,61,46} +{9,51} +{87,93,82,94} +{49,45,95,95,66,39} +{100,56} +{11,5,78,42,45,37} +{3,57,80,46,13,34} +{1,74,53,31,33} +{11,84,8} +{27,99,21,31,96,58} +{99,81,90,17} +\N +{66,49,47,55} +{88,30} +{76,62,17,88,83} +{40,7,42,61} +{17,57,9,64,54,1} +{9,54,84} +{50,61} +{72,15,25,30,6} +{64,95,69,89,11} +{64,18,86,25} +{81,59,70,6,92} +{78,76} +{33,40,29} +{15,63,1,12,14,57} +{33,81,8,65,26} +{58,15,56,37,67} +{2,50,35,92,11,27} +{17,13} +{91,100,15,27,39,24} +{58,48,46} +{5,95,28} +{7,21,99} +{5,15,6,10} +{82,99} +{66,22,86,83,76} +{99,68,39} +{43,90,22} +{31,94} +{21,64,56,26,95,40} +{7,81,3,53,83} +{29,42,90,60} +{53,49} +\N +{26,31,14,73,88,51} +{69,2,100,9,34,16} +{78,35,97} +{68,16} +{34,45,42,73} +{7,19,55,70,69,11} +{11,62,61} +{32,17,51,33,87,6} +\N +{54,97,36,13,45,12} +{46,2,26} +{14,6,17} +{99,20,31,61,6,4} +{60,72,53,31,34,25} +{88,46,68,78} +{56,94,49} +\N +{33,65} +{70,51,84} +{55,91,27,33} +{22,19} +{34,78,11,94,3} +{16,67,91} +\N +\N +{64,5} +{76,18,83,5} +{57,13,30,56} +{60,92,25,31,43} +{38,17,54,5,2} +{56,58,39} +{42,43,5,69,56,89} +\N +{50,23,97,85,70,39} +{97,56,33,90,64,2} +{9,54,51,26,24,99} +{18,7,59} +{44,5,40,69,18} +{77,96} +{44,58,47,17,26,45} +{90,71} +{88,32,11,96,17,13} +{42,3} +{97,28,56,10} +{38,36} +{50,52,47,31} +{64,5,99,77,83} +{11,56,1} +{91,92} +{7,53,35,52} +{93,65,47,97,44,82} +\N +{64,66} +\N +{62,4,57,23,34,91} +{52,55,75,99,27} +{29,54,44,87,61,96} +{21,3,66,35,25,80} +{96,68} +{3,41,66,81,78} +{49,98,79,65} +{71,38} +{88,79,70,37,3,82} +{49,74} +{19,29} +{57,68,9,8,99} +{81,88,14} +{99,29,24,99} +{55,96,29,89,49} +\N +{56,2,84,79,74} +{30,52,64,74,62,5} +{88,32,19,25,9} +{40,11,49} +{98,52,27} +{11,86,29,86,6} +{91,53,63,53,44,28} +{88,10,30,48} +{75,64,75} +{14,92} +{98,62,35,67,66,35} +{40,65,11,80,73} +{1,1,63} +{85,32,53} +{91,27,68,50,66,63} +{66,54,38} +\N +{45,43,14,94} +{62,84} +{54,24,83,33,46} +{93,72,2} +{43,4,14} +{18,11,5,99,79,94} +{26,59,9,2} +{58,69,70,45,14,54} +{84,5,42,97} +{7,82,41} +{69,53,8,55,20} +{4,13,6,45,83} +{41,92,41,98,51,85} +{72,85,74} +{19,50,79} +{79,47,47} +{25,25} +{17,56,46,30,73,78} +\N +{92,42,83,34,92,29} +{8,52,76,80,9,55} +{80,100,2,52,24,4} +{55,15,92,27,86,50} +{83,79,41,88,86,53} +\N +{44,16,90,54} +{99,20,64} +{44,30,26,26} +{35,35,24,74,72} +{97,24,94,55} +{78,42,32,76,100,98} +{31,86,12,87,72,86} +{87,35,33,88,33} +{31,83,23} +{46,51,5,6,71,31} +{39,97,91,53,39} +{19,18,25} +{16,4} +{65,77,13} +{61,30,13,26,75} +{67,9} +\N +{31,3} +{15,19} +{97,39,71,30} +{12,96} +{36,96,82,62,5,74} +{81,22,46,11,19} +{97,55} +{58,67} +{10,68,79,74,23} +{29,71} +{50,59,8,1} +{12,51,32,7} +{62,16} +{48,82} +{84,21,24,13} +{46,86} +{100,96,32,54,13} +{72,41,3,67} +{61,9,7,75} +{39,44,50,30,38,6} +{63,63,6} +{69,35,6} +\N +{7,91,82,48,55} +{57,22,31,57} +{55,72,91} +\N +{76,98,43,71,10} +{100,34} +{78,53,14,73,23} +{42,90,28,44,44} +{90,34,22,81} +{60,32,56} +{98,53,58,58,61} +{61,70,59,78} +{2,96,27} +{83,99,25,47,13} +{17,54,11,47,70} +{70,43,11,89} +{93,70,82} +{72,57} +{35,95,49,36,19} +{82,25,16,49,43,93} +{2,51,96,48,88} +{20,81} +{74,4} +{66,83} +{90,75,98} +{25,87,59,92,55,96} +\N +{20,80,92,93} +{59,63,39,3,7,38} +{64,10,85,22} +{63,32,18,38,83} +{49,38,83,54,1} +{27,97} +{18,34,84,58,7,86} +{93,4,67} +{43,49,32} +\N +{29,14,5,50,30} +{59,15} +\N +{76,31,31,47,17,35} +{95,41,71,27} +{47,43} +{75,80,56,78} +{56,75,43} +{99,10,100,76,44,1} +{5,31,72,3,25} +{21,90,59} +{59,45,75,93,78,88} +{76,55,4} +{20,87,44,94,56,78} +{38,87,71,13,23} +{33,6,79,91,92,27} +{13,15,31,15,11} +{57,18,57,71,11} +{67,60,64} +{66,15} +{57,45,74} +{93,91,97,30,12,94} +{37,83,62,18,28} +{94,88} +{12,11,85,10} +{42,96,89} +{15,65,5,65} +{52,58,36,27,10} +{72,88,76,50,96} +{40,70,55,93} +{80,33,24} +{53,35,50} +{11,37,55} +{25,80,32,91,68} +{11,2,52,39,37} +{17,51,45,44,85,84} +{81,21,77,15} +{67,93,27,70,72,94} +{86,99,95,98,83,28} +{9,65} +{1,26,5,23,5,17} +{57,82,42,60} +{46,67,65,98,69,79} +{41,50,94} +{77,81} +{87,82,18,57} +{88,27} +\N +{32,58,81,88,94,90} +{23,37,65,38,29} +{61,11,65,77,25} +{50,53} +{38,2,11,9,27,94} +{64,9} +{1,45,97} +{61,41,67,46} +{13,41,90,15,80,82} +{83,6,9,22,25,37} +{95,74,22,64} +{16,17,4,80,66,33} +{25,42,43,84,96,85} +{25,93,50,87,6} +{35,67,90} +{82,37,59} +{4,44,83,2,81} +{78,46} +{64,79} +{18,41,3} +{56,20,51,83} +{26,77,52,70,93,13} +{54,53,12,47,57,63} +{94,48} +{39,12,41,5,3} +{28,33,93} +{20,29,9} +{75,38,10} +{96,54,96} +{47,87} +{19,35,11,3,80,72} +{75,56,84,24,55,48} +{58,5,13,6} +{10,53,32,6} +{23,8,59} +{71,2,35} +{41,16,99} +{77,6,16} +{30,27,56,85,11} +{47,21,93,82} +{50,68,85,34,19,57} +{14,76,58} +{78,81} +{68,99} +{19,79,67} +{91,73,82,88,44,36} +{49,18,75,32} +{54,18,99,74,9} +{51,58,60,30} +{99,86,83,22,88} +{24,42,76,30} +{86,16,54,69} +{37,39,72,45} +{92,62,3,36} +{31,80} +{43,22,11,15} +{38,88,95,25,49} +{92,21,10,28,47,55} +{99,18} +{26,64,72} +{29,12,17} +{54,69,49,84} +{57,42,4,61,10} +{60,85,74} +{24,29} +{91,71} +{96,49} +{47,51} +{88,67,59,18,86} +{32,18,64,54,41,27} +{78,100} +{77,30,85,93,2,20} +{80,90,68} +{49,37,5,42} +{88,12,94,51} +{85,65,2,41} +{60,38} +{87,37,20} +{27,81,94,37,54,84} +\N +\N +{38,74,78,78,89} +{3,100} +{42,80,68} +{34,17,96,91} +{7,29,83,71,87,26} +{28,81,76,8,43,48} +{74,11} +{28,85,84,78,59,69} +{30,22} +{3,83,75,60,78,11} +{20,62,18} +{74,69} +{91,44,50,62} +{57,18,9} +{14,48,21,33} +{91,1,53,58,92,51} +{64,41,90,52} +{81,95,38,78,7,44} +{65,25,15,90,40,51} +{66,41,31} +{5,92} +{17,98,7,57} +{97,36,99,77,50,88} +{96,56} +{40,62,88,8,53,62} +{18,91,63,93,94} +{88,79,43} +{31,87,98,85} +{12,88,58,53,77,38} +{83,10,37,69,1,7} +{13,47,66} +{85,33,39,48,6,39} +{74,87} +\N +{26,50,26} +{48,78,10,39,17} +{27,43} +{58,17,18,80} +{86,43,58,77,67} +{53,12} +{9,79,9} +{85,79,89,88} +{35,77,22,52} +{93,64} +{47,52,90,17} +{75,15,25,68,28} +{35,6,68,37,18,53} +{80,14,2,89,4} +{52,49,5,66,59,44} +{5,26,96,1,84} +{71,8,61,19,72} +{17,94,84,72,55,83} +{72,10,16,40,17,75} +{6,70,15} +{22,99,7,19} +{55,19,4} +{6,47,69,42} +{17,9,63,44,15} +{23,20,72} +{10,80,20,87} +{99,3,23} +{11,76,8,77,58,38} +{45,14} +{22,89,73,63,54,9} +{16,19} +{1,26,29} +{92,20,65,33,16,40} +{27,64} +{22,19,95} +{36,16,9,30,31,14} +{40,50,68,52,77,58} +{35,15,11} +{67,2} +\N +{63,48,76,25} +{14,65,42,60} +{61,58,31,51,70,4} +{35,41,72,29,46,62} +{98,48} +{90,23} +{1,79,80} +{10,5,54,59,16} +{15,1,9,75} +{34,100,90} +{73,76,25,77} +{98,82,77,67} +{79,5,20} +{9,69,9,52,2} +{23,22,77,80,79} +{32,51} +{23,52,5,33} +{95,20,96,78,48} +{100,37,6,15} +{98,1,53,20,97} +{5,28,26,78,33} +{19,75} +{49,42,30,72} +{50,98,56,26,87} +{76,59} +{51,16,18,48,46,97} +{80,60,51,43,58,28} +{23,12,70} +{40,16,14,18,46,21} +{72,79,47,57,23} +{7,17} +{49,95,6} +{14,24,29,13,90} +{82,28,34,55,15,87} +{31,24,3,50,45} +{86,95} +{97,22,17} +{27,14,27} +{61,63,31,74} +{55,81,87,67,90} +{81,9,79} +{100,29,43} +{41,88,37,29} +{62,57,16,91,60,65} +{94,90,34,94,27,48} +{15,36,80,30,23,90} +{47,91,6,42,93} +{53,74,5} +{84,14,56} +{30,56} +{10,12,92} +{33,7,75} +{96,39,50,77} +{89,85} +{20,39,63,22,44,18} +\N +{90,23,79,91,85,8} +\N +{73,70,52} +{75,100} +{27,4,29,96,25} +{56,31,80,59} +{1,91} +{16,67} +\N +{17,88,59,41} +{13,49,29,76,71,9} +{41,38,16,29,66,40} +{68,67} +{39,74,47,71,63,80} +{4,74,33,92} +{17,60,82,7,52} +{62,88,39,19,22} +{77,21,1,95,42,2} +{98,62} +{55,17,81,31,11,88} +{73,52,18,94} +{16,64,90,33} +{87,41,81,95,85} +{20,55,96,75} +{71,72,11,11,83} +{75,94,89,47,41,7} +{56,48} +{76,29,74,31,67} +{47,70,68,36,70} +{5,69,10,94,54,32} +{29,96,71} +{64,28,86,58} +{82,57} +{42,2} +{64,48,59,8,45} +{61,69,43,40,1} +{69,84} +{68,51} +{32,20} +{21,7,5,60,35} +{100,40,18,98,37} +{50,96} +{87,10,12,27} +{47,3,46,43} +{60,87,10,31} +{92,87,50,37,72,73} +{99,61,77,87,29} +{23,95,31} +{96,100,43} +{17,64,84} +{13,19,57} +{65,86,4,75,46,69} +{49,60,99,37,50,11} +{77,82,88,12} +{12,95,66,98,63} +{83,78,68} +{76,14,87,25,29,14} +{20,9,99,73,67} +{42,51} +{36,22,33,6,63} +{53,46,22} +{40,89} +{37,7,89,17} +{32,89,16} +{65,87,4} +{16,16,57,35} +{34,90} +{80,54,1} +{11,93,34} +{5,19,31,50,99,33} +{98,1,33,54,7} +{45,39,23,78} +{37,47,98,83,10,5} +{55,88} +{42,76,18,99} +{86,31,25,5,45} +{67,87,47,1} +{23,15} +{78,88,66,96} +{58,55,41,67,86,41} +{21,53} +{90,14,28,38,95,71} +{20,5,13,52,1,88} +{29,98,50,75} +{91,3,24,75,82} +{62,33} +{56,69,31,95,66} +{46,85,40} +{17,22,67,57,39,16} +{58,25,92} +{31,53,82,64,69,40} +{40,12,30,1,39} +{78,10,42,40,25} +{58,27,1,12} +{28,11,80} +{36,89,69} +{50,95} +{61,63,59,62} +{51,77} +{90,24,88,84} +{61,27,57} +{51,81,33,75,48} +{47,30,33,23,44} +\N +{79,51} +{62,44,5} +{98,83,80} +{31,33,89,69,19,40} +{22,38,61} +\N +{90,70,10} +{37,90,49,65,95,52} +{95,42,4,47} +{92,100,43,31,27,1} +{39,17,88,20,2,80} +{82,64,36,84} +{31,18,21,18,52,34} +{3,35,36,33} +{26,39,69} +{67,63,66} +{54,85} +{65,71} +{26,14,84} +{63,82,73,30} +{66,79,21} +{71,13} +{57,25,40,14,22} +{63,9,91,31} +{70,63,36,32,93} +\N +{11,85,47,31,54} +{34,47} +{42,78,92,85} +{75,64,12,83} +{33,98,52,91} +{22,25,91,79,33} +{33,51,20,77,23,65} +{83,66,46,24} +{54,65,31} +{43,41} +{52,47,66} +\N +{59,85,85,63,83,53} +{16,76} +{44,97,48,52} +{26,36,72} +{26,55,98,7} +{70,88,7,87,62,37} +{11,42} +{98,38,36,72} +{51,90,82,33,92} +{59,80,79} +{76,77,18,71} +{34,56,62} +{85,12,37,66} +{34,64,74} +{77,63,28,76,11} +{2,63,87,50} +{60,98,60,19,15,57} +{93,66,33,71,36,3} +{41,94} +{62,72,87,19} +{57,83,36} +{63,64,21,13,70,32} +{71,36,9,55,34} +{92,52,90,45,88} +{59,54} +{4,51} +{55,25,35,90,93,2} +\N +{75,15} +{25,86,43,18,77} +\N +{31,40} +{55,49} +{67,1,84,20,9} +{15,1,48,18,100} +{62,46} +{4,39,86,55} +{49,17} +{65,20,71,49,55,49} +{40,57,63,14,3} +{48,68} +{67,97,58,55,5,34} +{3,73} +{79,97} +{82,63,87,66,32} +{19,49,96,50,55} +{32,19,41} +{17,53} +{64,81,70} +{66,75,18,92,54,93} +{7,94,38,86} +{16,62,45,19,10,11} +{18,47} +{58,96,69} +{65,25,58,98} +{29,51,37,40,44} +{91,78} +{37,84,85,65} +{70,61,31,22,32,22} +{67,12,99,39,78} +{41,79,46,54,84,22} +{38,26,43,4,45,75} +{29,68,35} +{69,59,69,33} +{4,46,52,49} +{1,25,44,12,71,29} +{38,75,99} +{83,58,86,6} +{93,35,35,34} +{85,26} +{15,46,95,60} +{62,63,65,49,10} +{44,67,19,80,83} +{63,41,30,43,85} +{13,46} +\N +{13,95,1,34,72,37} +{4,32,22,47,6} +{67,65,77,3} +{40,70,22,44} +{74,9} +{44,28,5,32,67,51} +{55,14} +{41,3,72,68} +{64,82,72} +\N +{11,88} +{91,90,92} +{68,66,95,80,58,54} +{30,49,11} +{54,86,59,69,67} +{56,83,36} +{15,67,9,47} +{92,30,78,2,87} +{12,54,2,1,59,36} +{84,25,67,38,19,53} +{28,45} +{54,84,9,75,59,26} +{47,35,54,93} +{36,96,59,75} +{78,78,52,93} +{87,96,67} +{5,61,15,13,27} +{53,58,6,78,86} +{43,70} +{72,38,15,61,58} +{75,27,30,12,35,71} +{18,72,35,62,81} +{45,10} +{36,91,73,25} +{81,85,22,34,29} +\N +{15,97,82,44,19,83} +{51,23,18,6,74} +{53,75} +{62,9,73,95,37} +{58,42,33,41,71} +{5,97} +{30,2,89,81,93,61} +{32,3,18,84,24} +{6,97,20,89,23} +{27,74} +{22,86,81} +{77,19,42} +{92,9} +{58,90,59,91,30,54} +{29,51,92,34} +{85,68,59} +{36,83,75} +{37,50,86,9} +{79,70} +{33,46,93} +{97,17,6,88,10} +{18,42,88,4} +{41,95,71,27,95} +{8,2,81,56} +{54,94,54,28,70} +{34,87,20,10,5} +{36,76,87,5,100} +{97,91,25,89,89,95} +{76,26,73} +{82,23,7,42,58,72} +{53,16,99} +{10,34,57,47,2,96} +{81,93,26,19} +{8,1} +{79,55,37,61,62,3} +{34,16,69,58} +\N +{41,7,99,87} +{70,21,86} +{59,2,49,45,91,97} +{37,2,74,2,61,68} +{97,39,15,4,13,1} +{67,71,8} +{51,2,84,38} +{55,8} +\N +{75,27} +{37,36,49,70,82,41} +{70,20,85,89,99,90} +{69,61,100,49,75,35} +{11,4,67,4,91,17} +{77,56,65,78,25,8} +{16,58,6} +\N +{88,38,19,88,27,27} +{12,46} +{36,67} +{62,33,96,94,80,96} +{56,94,12,1,65,54} +{58,73} +{19,80,27,72} +{47,55} +{14,91} +{94,75,92,32,19} +{99,12,91,4,85} +{56,55} +{86,83,77,66,66,87} +{46,68,13,45} +{49,75,62,35,39} +{20,25,33} +\N +{91,47,56,68,14} +{88,43,24,42,4} +{50,24,94,18} +\N +{71,54,91,66,97,22} +{81,16,19,67,6} +{78,46,81} +{63,93,71,75,87} +{90,38,10,85,12} +{11,24,93,42,25,77} +{30,14,32,67,70} +\N +{86,91,77} +{73,74,64,66} +\N +{7,18} +{85,94} +{37,15,55,100,59} +{55,18,44,79,57} +\N +{52,40,97,75} +{60,53} +{38,9} +{27,67,77} +\N +{43,83,82,24,35,64} +{22,75,29} +{9,19} +{67,1} +{15,35,11} +{65,45,95} +{65,9} +{63,84,99,89,6,77} +{20,44,31} +{82,50,88} +{29,12,46,21,98,7} +{98,71,3,73,6,86} +{61,44,74,2,45,33} +{16,56} +{31,87} +{72,30,37,94} +{65,30,82,17,12} +{86,19} +{55,76,96,61} +\N +{44,92,83} +{41,22,79,95,20} +{36,33,86,9,61} +{22,88,8,57,73,30} +{63,97} +{36,53} +{56,52,48} +\N +{35,8,3,93} +\N +{53,52} +{7,48,78,46,70,14} +{33,92,55,17} +{39,57} +{71,43,72,7} +{92,85,55,38,35} +{68,30,67,8,18,92} +{9,85,82,24} +{46,46,19,14} +{96,97,31,59} +{35,99} +{54,7,20,28,29} +{20,21,56,82,19,40} +{2,39} +{33,49,63,49,93} +{35,40,26} +{30,35} +{94,70,2,23,91,74} +{34,37,72,19,15} +{92,21} +{72,63,64,35,40} +{59,11,9} +{24,3} +{93,75} +{22,14} +{63,99} +{39,47,10,14,3,45} +{51,74,5,85,70} +{6,33,15,4,89,20} +{97,82,29,15,66} +{47,47} +{88,79,57,10,68} +{18,22,13,100,100,67} +{75,50,9} +{3,12,34} +{39,51,20} +{56,5,63,18} +{83,44,86,46,37} +\N +\N +{60,16,54,75,62} +{91,95} +{39,55,11} +{37,7} +{29,49} +{38,4,52,85,67,38} +{36,56,2} +{52,14,92,39,77,16} +{42,25,49,55} +{70,10,33} +{53,46} +{83,15,28,59} +{35,69,82,4,58,46} +{73,55,64,9} +\N +\N +{60,25,8,8,39} +{50,71,61,64,64} +\N +{65,67,67,34} +{77,59,18,64,16} +{43,72,32,44,59} +{55,57} +{12,47} +{30,75,89,81} +{23,92,16,31} +{64,45,21,74,19} +{4,47,49,47,96} +{37,14,20,18,87} +{61,45,38,39,1,87} +{4,98,99,52,27} +\N +{23,6,50} +{22,61,46,79} +{90,54,60,9,49,42} +{73,27,51,72} +{73,11,23,60} +{7,31,52,34} +{27,68,39} +{39,8,21,48,64} +{86,64,92,60} +{55,36,40,46,23,46} +{32,79,86,44} +{72,29} +{33,87,57} +{57,87,61,22} +{67,84} +{32,99,26,92} +{22,27,34,82,8} +{99,25,99} +\N +{29,75} +{39,63,25,45,7} +{39,67,18,13,18} +{23,83} +{77,69,22} +{60,13,46} +{2,10,42} +{37,20,27} +{30,21} +{85,15,52} +{6,89,38} +{68,22,26,37,96} +{6,85} +{93,51,63,46,26,64} +{79,77,15,26} +{90,6,39} +\N +{50,58,85,27} +{69,8,72,47} +{7,59} +{55,16,54,95} +{96,5,50} +\N +{77,92,13} +{46,30} +{43,65} +{17,65,32} +{10,6,46,1,47,75} +{48,82,71} +{63,12} +{68,14,10,97,34} +{15,45,58,100,7,74} +{9,23,88,1,95} +{61,60,15,12,58} +{84,51,46,41,71,26} +{58,62,39} +{86,67,31} +{32,31,89,2,30} +\N +{90,74} +{65,79,76} +{22,30,77,47,40,23} +{67,99,56,73} +{11,24,30,93,89} +{70,17,65,78} +{100,6,67,29} +{39,4,22,59} +{84,29,70,9} +{74,43,72,27,55,27} +{12,39} +{1,83,100} +{48,23,9} +{21,88,21,35,16} +{92,34,44} +{91,96,13} +{93,57,40,79,81} +{86,3,94,82,43} +{78,70,19,97,49} +{47,22,98,36} +{20,59,65,54,81,27} +{58,13,73,19,54,96} +{26,20} +{70,75,14,70,82} +{77,67,53,33,83} +{2,43,36} +{84,17,28} +{68,25,95,62,92} +{47,90,15,69,85,23} +{92,92,24,37} +{96,14,14,38,38} +{80,4} +{66,86,28,15} +{18,90,74} +{93,76} +{64,96,14} +{76,41,86,67,64} +{58,95,2,86} +{12,60,96,70} +{22,37,58} +{1,67} +{75,23,24,7} +{3,57,66} +{57,30,68,100} +{68,57,33} +{26,32,65,51,75} +{40,14,60,97,83} +{88,96,42} +{66,21,21,78,34} +{15,56} +{86,60,66,66,16} +{94,6,58} +{99,63,70,57,10} +{82,59,62,38,82,51} +{48,61,9,46,28,57} +{29,23,61} +{12,30,42,20} +{99,65,24,7,97} +{20,5} +{6,49,85,56,97,4} +{62,93,88,86,75,29} +{46,2,94} +{57,71,45} +{38,60,21,78} +{95,53,92} +{61,1,88} +{67,80,49} +{59,82,1,48} +{19,94} +{25,64,16} +{96,73,50,85} +{28,17,46} +{81,51,50,18} +{57,99,66,93} +\N +{23,62,57,94,40} +{21,6,83} +{4,11} +{83,16,50} +{46,41,23,1} +{4,15,8} +{86,51,29,80} +{48,34,55,81,89} +{5,2,43,67,66} +{42,59,37,91,1} +{14,98,27,80,33} +{18,58} +{49,93,60,91,94,88} +{32,62,64,63,48} +{51,1,90} +{56,8,68,49} +{16,34,79,18,76} +{66,88,41} +{31,66,93,44,96,40} +{100,99,30} +{37,49,95,91,18,43} +{95,2,94} +{84,15,70,31,30,84} +{31,41,45} +{9,73,2,7,34} +{17,35,43,1,25,72} +{8,70,8} +{1,93,32,16,71,61} +{98,51,27,56,46,65} +{1,11,57,72,33,7} +{48,96,64,55,75} +{83,82} +{7,74,70,29,59,60} +{29,44,5,77,52} +{84,58} +{87,63,62,52,69} +{29,58,32,11,13,17} +{35,99,67,67,93} +{54,31} +{53,24} +{58,59,32,22} +{8,76,23,63,94,54} +{3,88,75,17,64,91} +{29,30} +{3,81,39,9,77,82} +{77,85,59,56,8} +{47,12,63,13,40} +{66,81} +{67,33} +{39,46,28,79,95,67} +{49,13,98,63,10,58} +{14,42} +{80,70,60,92} +{63,54} +{30,70} +{60,89,14,62} +{56,40,94,55} +{70,31,46,20,95} +{18,65,89,7,75} +{60,33,80,43,37,4} +{85,19,98,79,36,84} +{69,1,48} +\N +{30,87,9,22,99,60} +\N +{23,96,9,85} +{22,94,39,58} +{30,38,4,97} +{16,70,62,5} +{35,52} +{32,10,72} +{35,34,40,31,66,80} +{7,77,14,48,97} +{67,64,37,22,69} +{51,53} +{67,71,90} +{87,71,45} +{44,84} +{19,58,11,34,45,85} +{68,19,55} +{27,16} +{7,14,92,22,33,46} +{47,2,49,53,63,32} +{15,39} +{13,47,84} +{29,74,97} +{51,74} +{70,26,46,33,51} +{31,86,14,23,61} +{20,85} +{21,10,57} +{90,94,59,72,97} +{97,30,74,84} +{15,89,69} +{11,40,2} +{68,19,47,28} +{47,65} +{2,7,52,53,44} +{40,74,34,36,78,71} +{22,60} +\N +{37,75,47} +{53,78,2} +{4,32,42} +{35,76,69,88} +{95,13,3,38,3} +{74,74,62,90} +{8,72,42,2} +{11,43,5,43,70,16} +{69,19} +{61,37,26,49} +{16,100,69,32,35} +{58,77,26,76} +{74,87,37,47,84} +{8,82,29,93,15} +{74,88,93,85,97,95} +\N +{29,23,99,98,36,93} +{8,36,87,64} +{71,90,43} +{7,28,78,46,52} +{62,25} +{33,90,7} +{60,72,39,18,86} +{98,59,73,24} +{17,69,2} +{49,16,63,56} +{13,37,62,1,95} +{98,89,69,92} +{50,26,34} +{90,16} +\N +{40,54,3,79,51,19} +{29,24} +{6,12,82,24} +{92,52} +{89,2} +{64,25,68,55,81,2} +{64,77} +{71,46,58,50,56,34} +{94,17,35,30,60,33} +{37,30,2,40} +{98,15,16,92,2,50} +{44,19,82,57} +{37,34,6} +{59,43,1,53,79} +{7,37,14,14,92} +{80,78,49,81,23,17} +\N +{91,51,12,35,79} +{9,14,2,84} +{62,3,77} +{25,5,40,12,40,79} +{65,88,82,94,89,90} +{20,35} +{80,71,83} +{6,9,83} +{94,58} +{2,76,55,61,42,53} +{60,53,45,82,3} +{1,37,75,96} +{82,61,81,10} +{36,46,1,31,90,45} +{22,55,11,25,21} +{69,13,29,20} +{95,54} +{16,79,82,67} +{4,58,84,84} +{52,7} +{25,14,94} +{69,8,67,54} +{30,71,36} +{81,78,23,38,76,58} +{86,59,61} +{11,42,63,74,99} +{66,4,55,34,16} +{39,57} +{10,81,9,8,21,10} +{75,55,64,97,7,45} +{8,46,86} +{39,100,52} +{30,51,7,13,54} +{72,85} +{10,52} +\N +{61,7} +{93,1} +\N +{74,31,3} +{90,96,26,84} +{88,58,74} +{28,45,74,24,74} +{95,88} +{42,70,43,64,22} +{46,83,48,36} +{81,99,100,43,11} +{47,24} +{46,67,63} +{26,15,36,89} +{90,11,78,70,81,87} +{65,90} +{89,99,21,81,47,38} +{37,42} +\N +{94,51} +{12,57,95,63,29} +\N +{68,99} +{27,8} +{16,52,11} +{72,5,85,44,57,51} +{11,6,91,7} +{87,80} +{94,61,1,38,77,89} +{93,60,6,98,46} +{52,47,44} +{93,66,61,22} +{7,61} +{15,83,93,91,12,40} +{66,3,5,72,72,36} +{67,72,68} +{42,42} +{38,17} +{75,60,47,39} +{58,28,51} +{61,8,61,81,65} +{46,52,97,84,27,47} +{97,53,47} +{64,93,83,72,27} +{34,79,34,36} +{25,5,92,37} +{12,20,55,94} +{17,43} +{39,37,16,70} +{79,62,15,16,64,28} +{80,87,96,41} +{51,55,1,94,72} +{75,22,56} +{2,55,7,20,39} +{8,91} +{73,8,42,73,31} +{90,90,23} +{82,68} +{63,64,68,12,59,19} +{100,80,23,24,17} +{23,46} +{25,13,31} +{43,95,54,85} +{40,62,21,21,82} +{70,20,16} +{90,11,23,18} +{16,9} +{51,57,30,27,21} +{50,55,75,77,53,33} +{84,92} +{14,66,32} +{44,100,16,30,82} +{41,48,58,60,7,44} +{81,76,13} +{18,26,82} +{84,35,15,98} +{52,84} +{13,80,36,35,28} +{91,16,71,55} +{87,89,6,20,28} +{12,75,92} +{48,41,55} +{59,75,26} +{48,19,48,72} +{91,4,100,25,17} +{46,52,97,78,94} +{7,81,76} +{54,54,49} +{89,37} +{78,22,57} +{75,25,83} +{25,89,10,38,96} +{52,12,1,74,35} +{13,48,88,7} +{6,97,20,19,91} +{53,2,99,76} +{4,58,46} +{30,30,89} +{97,2,87,47,55} +{14,11,72,83,97,74} +{44,69,11,51} +{47,17,86,27} +{15,19,56,96,24,94} +{81,67} +{11,11} +{20,94,49,36,39} +{39,78,40,46} +{33,87} +{76,89,58} +{94,74,25} +{33,77,5,47,55} +{28,67,99,81,93,83} +{31,10,19,65,60} +{53,25,74,24,48} +{73,69,23,45,88} +{70,56,41} +{21,73,72,28,99,5} +{75,69} +{78,99} +{66,49,89,86,2} +{30,53,18,21} +{67,69} +{1,98,38} +{91,25,16,39} +\N +{75,54,93,39,18} +{96,84} +\N +{64,71} +{6,15,78,50} +{8,45,26,15,25} +{8,90,94} +{52,66,13,98,86,69} +{3,25,28,56,88} +{84,72,89} +{10,33,46,6,57,100} +{13,91,99,2,49} +{83,59} +{88,64,42,50,77,16} +{81,12,27,45} +{12,17,31,93,22,53} +\N +{28,84,85,35,3} +\N +{42,12,86,76,37,63} +{46,23,18} +{45,80,76} +{94,18,100} +{17,80,84,80} +{84,88,29,16,10} +{7,42,90,51,33,40} +{79,51,22,2} +{31,30,72,24,23,84} +\N +{55,50} +{69,47,82,29,83} +{94,56,69,18} +{7,81,71} +{95,13,32} +{66,59,68,62} +{52,19,62,51,80,32} +{38,18} +{73,24,81,58,22} +{11,59,41,19,96} +{61,11,56,89,89} +{61,72,9} +{63,12,8} +{76,54,75,84,6,44} +{74,3,11,62,30,74} +{46,60,71,55} +{28,47,52,71,33,33} +{35,51,37} +{38,89,40,29,30} +{18,26,99,89} +{36,44,8,100,72} +{1,23,6,5,23} +\N +{84,17,16,44,81} +{29,70,24,85} +{23,57} +{20,98,30,23,1,66} +{82,3} +{70,7} +{15,49,58} +{19,40,70,88,35} +{45,10} +{62,89,47,71,55} +{34,100,88,44,3,91} +{92,65,16,24,7,9} +{20,12,61,95,7} +\N +{57,49,42,87,88,14} +{89,99,86,31} +{32,55,51,78} +{55,66,78,10,12} +{37,19} +{13,5,36,66} +{89,7,40,45} +{41,58,41,24,11} +{98,8,9,27,40} +{49,83,89} +{91,36,78,44,100,62} +{76,78,9,52,57,27} +{100,59,37} +{51,1} +{92,83} +{45,1,85} +{8,81,45,94,32} +{52,26,9,98,7} +{97,52,4,85,13,11} +{94,38,58,4,72,91} +{5,39,26,14,74,51} +{31,44,37,24,89} +{8,74} +{56,93,36,3} +{23,46,25,90,42} +{4,98} +{31,95,27,26,20} +{3,7,79,9,90} +{29,22} +\N +{35,34} +{80,28,12,21} +\N +\N +\N +{36,49,94,83,25,9} +{6,62,89,93,59} +{67,75,3,93} +\N +{94,62,3} +{97,36} +{43,89,26,94} +{46,56,22} +{50,15} +{45,47,39,61} +{23,32,24,45,43,11} +{97,66,29,8,52,67} +{37,1,48} +{30,84,86,91} +{4,46,59,35} +{76,37,41,90} +{26,28,92,27,88,17} +{76,37,27,41} +{74,51,31} +{16,33} +{66,85,68} +{4,81,72,62} +{65,14} +\N +{11,43,28,14,9,43} +{60,88,95,1} +{52,92,69,48} +{37,81,85} +{57,73,8,79} +{50,26} +{52,41,99,6,33} +{9,34,58,22,9} +{56,37,19,77,50} +{93,21,18,90,41,40} +{28,89,76} +{4,36} +{89,54} +{70,28} +{66,11,3,47,30,43} +{69,54,86} +{45,41,57,34,18} +{91,46,32,68,42,68} +{25,87} +{75,57,12} +{55,15,68} +{6,63} +{22,39,88} +{77,39,10} +{39,49,69,61,66,77} +{78,25,42,73,89} +{17,47,36,27,79} +{33,83,44} +{27,75,12,96,94,87} +{50,17,95,42,25} +{67,13,22} +{59,85,95,2} +{81,57,83} +{25,11,72} +{32,84,97,6,65,52} +{62,25,24,27,50} +{80,64,23,74,54,75} +{97,17,15,100} +{50,11,41} +{57,82,40} +{10,90,41,52,39} +{4,11,86} +{79,17,51} +{48,100,92,77,58} +{88,67,19} +{40,96,52,35,16} +{89,63,32,81,28,63} +{44,56,66,50,55} +{28,73,46} +{32,40} +{52,65,85} +\N +{51,34,18,82,83} +{49,49,90,71} +{84,16,74,78,86,10} +\N +{73,9,47} +{51,59,49,90} +{85,13,78} +{98,77,18,15,92,85} +{40,94,66,94} +{89,51,80,12} +{23,26,75,17} +{96,2,51} +{88,62,90,32} +{85,19,87,89,30,15} +{33,38,9,46,19,87} +{27,45,15} +{39,79,82,88} +{31,33} +{41,64,10,1} +{35,61,22,76,74} +{75,11,90,16} +{71,23,43} +{35,3,97} +{88,4,97} +{100,61,28} +\N +{64,74} +{9,44,81,98,55} +\N +\N +{76,89} +{18,34,80} +{77,83,91,50,20,41} +{65,50,26,65} +{79,18,90} +{5,60} +{42,21} +{31,70,80} +{20,98,15,14} +{58,65,45,6,64} +\N +\N +{88,82,98} +{75,81,32,34,59} +{37,14} +{30,36,55,70,65} +{84,55,26} +{56,64,1} +{31,41,89} +{46,43,43,90,34,100} +{78,36,21,14,69} +{100,10,45} +{73,69} +{60,86,5,70,78,99} +{6,89,92,8} +{86,68} +{44,4,71} +{41,36} +{95,80,42,94,34} +{73,29,50,49} +{61,20,57,17,36} +{37,58,67} +{56,83,77,37} +{98,67,40,10,35,76} +{54,84,6} +{7,71} +{65,74,43,6} +{62,98,74} +{81,26,17,22,47} +{49,32,59,35,11,94} +{80,50} +{91,1,50,97} +{71,35,84} +{97,4,46,45,8,36} +\N +{81,62,76} +{69,78} +{89,3,16,64,17,17} +{78,72,26,88,81} +{25,34,9} +{50,27,34} +\N +{55,44} +{61,51,39,53,44,46} +{23,94,32,92,90} +{91,47,67} +{1,13,76,57,63} +{77,19,73,18,95} +{100,82,87,6,83,23} +{69,58,48,97,60,50} +{4,83,85,6} +{3,5,91,37,94} +{91,72,31,32,80} +{57,23,39,46,50,20} +{92,28,53} +{71,27,46} +\N +{59,73,29,21,89,30} +{1,83,96} +{34,41,65,35} +{52,89} +{62,68,80,7} +{82,82} +\N +{11,2,62,46,28,9} +{9,16} +\N +{22,44,73,82,39,86} +{97,52} +{46,36,76,56} +{17,97,26,72} +{16,78,9,70} +{65,55,30,91,16} +{27,45,76} +{17,5,90} +{86,52,93,89,42,27} +{51,70,41,35,1} +{91,57,66} +{53,59,62,57,52,56} +{100,100} +{32,78,87} +{61,57,74} +{86,79} +{55,94,64} +{81,20,26,22,23} +{9,96} +{86,65,35,19,88} +{1,37,90,51} +{79,47} +{93,93} +{32,85,69} +{49,12,6,69} +{6,44,92,37} +{28,42,57,28,2,69} +\N +{63,90,25} +{53,28,74,42} +{83,79,94,37,64,69} +{93,100,57,84,80} +{39,93,80} +{97,92,55} +{27,6} +{20,100} +{19,66,3,66} +{7,76,15} +{7,56,92,11} +{61,76,6,98,52} +{20,46,51} +{12,77,45,67} +{78,79,32,22,21,47} +{62,35,1} +{86,66,57,10,47,43} +{43,24,76,18,87,68} +{39,52,71,35,87} +{81,78,8,10} +{33,70,53,54} +{25,77,27,68,95} +{29,53,89,62,51} +{21,76,33,72,39} +{13,22} +{1,1,51,73,20} +{26,97} +{64,75,23,94,62,68} +{25,20,84,57,27} +{26,7} +{92,80,17,48,72,73} +{73,49,88} +{24,36,70,53} +{7,79} +{80,58,33,25,91} +{19,43,61} +{54,49,73} +{51,88,4} +{9,32,5,83} +{17,68,90,15,30} +{98,50,42} +{29,52} +{32,41,4} +{33,97,69,34} +{94,2,60,5,83} +{23,86,43,74,35} +{63,37,38,58,39,14} +{56,7,82} +{88,81} +{50,75} +{78,49,67,68} +{10,61,58} +{84,35,20,30} +{36,34,48,31,16} +{35,7,47,22} +{98,40,56,43} +{16,4,7,9,44,55} +{86,90,30,80,47,91} +{34,91} +\N +{12,67,77,23,11} +{94,8} +{5,68,31,82} +{26,65} +{51,19,86} +{55,83,39,39,96,51} +{31,22,70} +{20,50,15,93} +{1,55,64} +{8,2,14,3,40} +{2,71,25,41,5,5} +{98,61} +{21,64} +{100,76,99,18,78} +{17,4,69,97,61} +{52,79,97} +{52,26} +\N +{90,54,2,62,11,51} +{33,12,34,45,2} +{91,63,51,42,82} +{100,79,73,70,54,14} +{57,94,81,55} +{13,18,94,17,16,34} +{58,79} +{90,64,68,46,95} +\N +{37,46} +{91,94,10,85,100,24} +{65,86} +{94,89,7} +{72,79,77,53,95} +{65,19,92} +{41,79,53,8,63} +{28,60,50,42,9,32} +\N +{6,23,97,23,10} +{12,28,16,39,70,50} +{26,97,61,48,79,23} +{38,98,21,34,65,89} +{29,13,36,19,13,45} +{72,65,58,81} +{43,98,84,5} +{79,41,100} +{35,30,69,42} +{59,13} +{65,90} +{40,38,21,23} +{2,19,26,38,66} +{5,16} +{84,85,97,84} +{34,26} +{87,17,21,32,29,25} +{75,66,87,90,18} +{84,32,29,51,71,68} +{57,25,73,24,53,2} +{74,16,92} +{99,60,19} +{98,14,70,72} +{24,34} +{37,34,81,100} +{67,10,17,60,16,55} +{39,58,5,23,85,95} +{75,93,19,31,47} +{13,27} +{42,14,32,90} +{59,79,70} +{48,96,45,38,58} +{96,87,84} +{23,70} +{25,31,81,36,75,32} +{64,49} +{30,18,38} +{69,27} +{76,82,43,96,73,17} +{84,95,97,12,20} +{57,69,36} +{60,79,19,67,9,12} +{32,39,3,21} +{55,83,51,48} +\N +{37,11,98,53,11} +{2,73,24,18,77,74} +{69,96,17,49} +{53,2} +{1,76,72} +{35,93} +{35,36,36,25} +{59,77,30,13} +{35,69,36,31} +\N +{20,23,51} +{81,83,57} +{87,43,40,56,81,64} +{24,63} +{29,51,45,93} +{73,85} +{59,1} +\N +{13,57,14,11,34,91} +{69,1,4,28,77} +{63,68,41,53,64,43} +{11,1,46,40,6,88} +{51,19,77,10,86,66} +{74,40} +{25,54,46,62} +{94,17,64,15,20,36} +{100,71} +{63,66} +{33,88,5,92} +{92,86} +{91,69,75,13,20} +{57,22,32,33} +{72,87,44,64,46,6} +{50,56} +{36,23,7} +{74,63,3,6,14,29} +{91,42,8,11,49} +{32,64,94,88} +{91,78,55,27,59} +{2,20} +{52,95} +{57,59,35} +{51,15,52,24,14,13} +{64,16,18} +{50,98,71,10} +{92,99,92,80,77,73} +{96,12,70,85,54,73} +{10,44,30,77} +{29,47} +{40,55,62,58,30} +{59,93,7,21,6,20} +{58,91} +{5,70} +{36,23,58,80} +{16,93,54} +{20,8,97} +{78,32} +{10,31} +{24,10} +{56,14,28,10,45} +{1,79,53} +{56,58,86} +{93,83,17,89,93} +{12,4,26,45,97,17} +{42,67,17,13} +{31,90,59,38,4,20} +{86,52,67,10} +\N +{49,59,10,25} +{69,88,31,38,7,36} +{84,21,57} +\N +{60,8,19} +{35,81,66,96} +{13,95,54,38,31} +{27,25,34,11,65,64} +{54,43,20,20,65,95} +\N +{19,27,100,69,43} +{91,8} +{30,65,98,87,84} +{83,85,100,16,20,18} +{80,48,56} +{61,5,92} +{14,94,43,91} +{35,52,60,43} +{73,25,26,61} +{66,41,39,16} +{2,96,90,37,99,92} +{25,31} +{72,57,50,82} +{40,69,5} +{98,34,66} +{90,44} +{34,78,93,15,65,71} +{98,1,28,36} +{16,59,79} +{88,1,14,45} +{41,91,87,20,72} +{46,9,81,90,63,32} +{2,84,29,56} +{2,57,92,69,63,46} +{3,32,76,62,36} +{11,81,3,81,90,16} +{36,1,42,51} +{29,86,53,51,85} +{17,66,16} +{4,21,25,17,65,92} +{13,26,33} +{74,6,46} +{69,19} +{47,78,85,46,41} +{41,62,100,85} +{22,71,66} +{28,15,58,84,22,92} +{68,82,82,85,15,54} +{34,58,72,35,99} +{51,100,40,13,61} +{80,89,94,31,96} +{48,29,33} +{32,85,75} +{76,43,17} +{79,70,3,64} +{76,64,85} +{94,90,3,85} +{86,21,1,8,54} +{87,92,30,36,59} +{20,51,62,17} +{81,61,86,96,47,67} +{5,75,97} +{60,24,79,3} +{85,49,49,48,61} +{66,60,58,92,88,90} +{2,18} +{42,54} +{42,83,68} +{98,76,42,25,90,32} +{64,36,39,45,34,95} +{56,43,78,10,63,18} +{51,40,98} +{85,11,74,41,14,25} +{37,12} +{76,32} +{6,77,16,86,36,25} +{23,93,18} +{75,51,67,29} +{22,9} +{18,58,25,88} +{95,31,12,20,62,54} +{23,97,89,63,73} +{77,41,11,27} +{91,86,97,98} +{84,6} +{74,69,55} +{58,42,92,65,52} +{77,31} +{8,91} +{5,83} +{64,48} +{1,37} +{51,4,49,97,64} +{29,70,3,62,57,1} +{91,8,31} +{86,71} +\N +{61,50,8,6,59,76} +{83,8,54} +{50,45,66,86,52} +{75,48,18,88,82} +{1,52,60,78,45} +{46,65} +{53,2,63} +\N +{89,97} +\N +{75,23} +{30,58,13,50,2} +{59,73,52,8,70,39} +{20,35,77,34,10} +{55,86,14,74,14} +{67,46,48} +{20,9} +{20,92,62,75,34,98} +\N +{72,14,18,46} +{48,71,92,17,31,13} +{47,46,42} +{42,75,14,86} +{97,55,97,78,72} +{8,4,96} +{44,13,13,18,15} +{16,40,87} +{87,66,79} +{14,44} +{35,79,25,57,99,96} +{23,66} +{90,49,24,11,8} +{50,3,24,55} +{60,94,68,36} +{11,20,83} +{66,100,47,4,56,38} +{36,34,69} +{41,57,15,32,84} +{32,25,100,45,44,44} +{70,32} +{15,37,67,63,71,34} +\N +{81,62,20,91} +{32,62,1,68,86,54} +{20,91,40} +{79,69,22,98,14} +{45,42,24,2} +{30,53,15,62} +{81,100,42,20,96,42} +{93,19,7,59,100,49} +{25,7,18,64} +{11,27,1} +{89,67,65} +{39,97} +{47,62,30,61,58} +{4,11,83,18} +{38,30,95,58,13,81} +{83,6,33,73,64} +{89,51,77,45,58,16} +{13,11,88} +{96,79,71} +\N +{18,66,83,52,84,76} +{52,17} +{74,95,16,5,16,51} +{21,20,16,39,84,71} +\N +{75,47,36} +{65,45,12,5,100} +{41,74,84,21,73} +\N +{8,90,46,39,30} +{47,84,42,49,17} +{76,100,35,89,17} +{61,53,50,31,8} +{94,53,20,33,15} +{97,46,62,85,74} +{8,59,40} +{95,71,21,41,85,81} +{55,71,20,74} +{70,95} +{61,42} +{83,74,25,84,18} +{56,43,46,40} +{42,78} +{95,48,98,93,35,98} +{77,34} +{4,54,58} +\N +{13,54} +{87,66} +{12,88,90,95,6,95} +{65,20,10} +{62,74,59} +{49,17,51} +{14,17,65,3,27,41} +{43,42,43,46,79} +{88,75} +{21,46,84,95,31} +{17,17,28} +{32,73,29,11,46,94} +{3,34,81} +{80,83,1,92,69,100} +{9,24,56,17} +{3,80,57,36,14,94} +{39,89,54,17,31} +{70,19,67,21,31,72} +{82,48,68,52} +{96,81} +{92,18,39,50,18} +{6,54,27,52,28,100} +{23,40,7,74,93,50} +{87,51,38,88} +{98,42,43,30,8,71} +{33,26} +{20,21,83,35,99,100} +{28,77,94,32,1,13} +{17,15} +{35,100,9} +{42,6} +{16,28,55} +{7,94,81,60,91} +{100,63,21,28} +{65,20,35,16,76} +{95,3,88,62,1} +{73,44,46,13,55,69} +\N +{60,49,71,77} +{93,39,75,63,69} +{97,36} +\N +{77,16} +\N +{57,30} +{39,31,56,51} +{62,78,62,38,54} +{69,86,99,10,12} +{11,43} +{60,70,83} +{83,82,3,1,60} +{24,55,61,85} +{65,72,13,77,79,100} +\N +\N +{28,97,71,78,68,95} +{34,1,72,79,84} +{10,49,91,44,27,51} +{15,48,80,37,69} +{42,46,32,34,86} +{80,21,26,50,5,8} +{61,71,100,78,54,50} +{36,20,80} +{67,40,47,68} +{60,7,36,36,55,2} +{32,91,13,98,88} +{15,56,65,23,13} +{20,66,81} +{19,36,99,54,86,92} +{82,28} +{43,32,91,37,70,68} +{71,78,82,50} +{1,31,23,48,10,12} +{88,96,1,44} +{27,49,97,29,89,35} +{63,72,58} +{79,9,32,64} +{75,67} +{46,31,83,54} +{66,24,6,89} +{82,10,64} +\N +{19,31,52,34,89} +{16,36,11,12,23} +{55,50,6,20} +{81,72} +{71,74,8,6,31} +{6,20,96,80} +{95,85,56,91} +{36,33,88,12,50} +{77,44,52,50,50} +{94,12,7} +{97,44,40,43,8,21} +\N +{61,14,40,75,87} +{43,21,67,66} +{46,19,80,12,46,28} +{56,11,14,59} +{31,94,50} +{45,26,61,15} +{84,45,44,82} +{9,16,86,54,93,30} +{50,39,37} +{35,60,64,55,73,90} +{61,65,87,20,30} +{12,59,44} +{23,8,97} +{30,59,7} +{85,32,14,95,38} +{18,91} +{10,40,20,8,58} +{5,58,4,94} +{100,11,96,70} +{66,72,7} +{5,31,89,89,4} +{81,68,44,37} +{22,22,76,67,72} +{22,26,30} +{73,47,27,18,54,30} +{44,13,73,95,83} +{18,93,72} +{30,22,73,13,16} +{14,11,66} +{45,33,59,72,92,81} +{97,82} +{30,4} +{1,9,46,70} +{47,50,20,71,48,60} +{26,62,53,70,63,49} +{39,26} +{47,94,9} +{55,3,18,1,75,22} +{42,87,74,57,60,55} +{95,46,21,38,27} +\N +{13,35,48} +{24,39,24,67} +{44,83,49,72} +{22,8} +{77,39,87} +{37,41,44} +{100,57} +{48,54,58,79} +{14,84,40} +{11,51} +{23,80} +{80,82,43,59,2} +{92,53,56,44,90,66} +{44,67,78,9} +{43,91} +{70,74,100,69} +{12,5,75} +{65,51,22,65,56,36} +{52,54} +{38,78} +{30,45,38,99} +{18,88,88,63,51} +{61,24,53} +{72,24,77} +{61,46} +{11,83,49,86,27,60} +{86,60,83,34,33,28} +{65,15,10,51} +{98,92} +{49,49,60,3} +{58,56,43} +{19,25,15} +{24,40,36,49,61} +{5,62,9} +{72,8,71} +{64,85} +{72,84,67} +\N +\N +{80,87,30,70,21} +{30,86,95,19,21} +{17,90,15,89,81} +{40,51} +{77,88} +{14,89,82,62} +{40,66,93,16,55,45} +{22,46,31,17,4,71} +{8,41,88,94,25,61} +{80,8,23,71,59,53} +{61,70,23} +{2,4,79,6,67} +{27,70,42,68,33} +{46,27,10} +{1,93,42,12,8} +{31,9,19,32,62,15} +{16,42,81} +{56,29,12,17,61} +{52,100,98,42} +\N +{29,38} +{49,40,47,63,22,4} +{99,70,13} +{70,28,67,100} +{37,75,65,63,35} +{45,67,37,28} +{42,78,71,39} +{33,35,76,69} +{65,84,57,63} +{17,12,86,23} +{31,62,79} +{3,22} +{85,81,59} +{38,5,15,100,1,27} +{36,96,93,46,75} +{44,61,85,70,71} +{79,72,86,71,77,9} +{23,51,47} +{4,59,48,38,44} +{93,54,86,98} +{60,29} +{49,38} +{54,84} +{72,25} +{51,40,25,27,68} +{24,17} +{95,3,82,31,95} +{56,37,57} +{15,84,98,16,53} +{47,36,15} +{27,36,76} +{38,82,26} +{47,70} +{60,89} +{59,73,99,7,28,89} +{87,49,70,76} +{71,93,76,81,11,46} +{74,87,92,24,43,22} +\N +{26,1,85} +{18,73,43,94} +{92,2,73} +{5,58,85} +{20,7,39,18,59,90} +{11,16,19,77,60,56} +{77,1,95} +{4,4,11} +{48,40,56,74,96,29} +{71,1,62,69} +\N +{34,61,26} +{86,75,13,73,28} +{17,35} +{100,29,37,26,47} +{69,36,52,61} +\N +{81,51,54} +{54,78,46} +{1,78,96} +{33,54} +{72,9,37,30,100} +{67,10,52} +{77,19,74} +{52,27,41,37,98,73} +{8,74,86} +{4,40,99,6,59} +\N +{98,43} +{74,91} +{69,45,73,59,19} +{87,43,31,85} +{2,51,54,3} +{45,73,8,86,4,40} +{2,51,96} +{74,5,8,64,1,46} +{5,64,86,63,12,75} +{6,62,71,24} +{56,84,54} +{61,37,79,63} +{81,39,78,23,86,74} +{50,79,34,23} +{85,36,78,80,19} +{34,94,1,46} +{5,23,38,4,78,2} +{85,100,80,13,73} +{48,86,9} +{47,22,65} +{49,81,18,52,36} +{84,85} +{89,15,71,88,44} +{1,21,81,52,2} +{53,18,7,53,50,11} +{91,89} +\N +{20,6,20,70,12,32} +{98,94,70,52,41,35} +{43,25,2,63} +{95,86,6,82,2,41} +{79,24,63} +{12,96,7,18,48,67} +{55,35,4,75,28,39} +{48,46,33,75} +{10,99,5,5,98,25} +{43,87,5,53,76,64} +\N +{100,13,9,4} +{4,35,65,56} +{27,74,88} +{59,66,10} +\N +{59,85,39,48,17,29} +{59,42,17} +{27,99,12,21} +{9,10} +{15,4,80,25,67,59} +{12,89,96} +{50,32,92,49} +{40,74,10,6,26,43} +{80,71,29,54} +{74,82} +{22,25,27,65,12} +{84,88,53,43,75} +{84,16,51,84,46} +{10,9,44,95} +{87,19,22,10,44,80} +{18,20,87,41,86} +\N +{9,64,4,33} +{65,87,23,65,32,92} +{50,2,23,68} +{29,8,82,28} +{54,92,6,2,28,70} +{23,11,65,78,34} +{77,85} +{30,49,59,8,60} +{77,30,34} +{55,73} +{89,68,55,81,8,81} +{54,28} +{35,22,67,63,48} +{43,37,46,56,81} +{16,78,32,81,77,37} +{35,80,41,76} +{4,93} +{3,32,23} +{43,18,50} +{87,5} +{30,40,91} +{36,69,17,82,70,57} +{73,71,47,63,58} +{24,11,36} +{2,72,61,76,9} +{61,97,10,85,92,56} +{5,44,47} +{24,57,79} +{69,39,97,8} +{78,16} +{62,52,17,35,28} +{48,79,66,64,36} +{14,72,75,30} +{17,21,41,25} +{28,100,66,56,15} +{89,3,32,86,6} +{67,34,16} +\N +{48,27,70,60,1,40} +{69,34,36,46,95} +{59,24,84} +{44,21,90} +{22,30,5,62,13,58} +{79,67,44,10,1} +{67,8} +{40,48} +{64,5,65,35} +{74,45,75,15,31,69} +{42,3,49,33,52,97} +{86,59,69,84,53} +{64,64,41,64,99} +{47,95,16,78,73,68} +{54,11,52,90} +{54,62,79,58,96,59} +{28,34} +{52,94,17,42,9} +{94,22,77,7,56} +{72,24,47} +{6,11,3,23} +{9,6,97,82,40,39} +{73,47,57,8,7,97} +{27,26,1,2} +{64,45,38} +{71,6,6,83,33} +{78,28,40} +{25,8,17,15} +{24,67,53} +{72,42} +{66,25,56,36,32,93} +{18,11,22} +{88,9,75,23} +{20,32,24,44,51,34} +{76,86,11,7,1,61} +{11,77,41,55,87,59} +{62,53,94,46} +{77,20} +{74,97,59,78,9} +{7,94,26,18,77} +\N +{49,59} +{72,22,42,89,14,80} +{49,14,38,19} +{43,88,25,58,39,24} +{21,34,37,65} +{85,3,46} +\N +{11,60,86,65,49,83} +{51,98,7,28} +{85,17,34,59,14,86} +{89,81,48} +{67,40,11,60,75} +{13,45,42,22,82,82} +{98,21,89} +{30,63} +{35,45,68} +{9,29} +{43,71} +{82,44,59,72,48} +{1,48,29,44,14,11} +{75,33,85} +{7,32,92} +{62,14} +{29,31,1,36,51} +{92,12,28,20} +{13,67} +{88,72,14,22,61,42} +{15,98,49} +{65,27,9,76} +\N +{15,95,26,12,52,40} +{17,20,74} +{57,63,15,22,38} +{93,71,8} +{26,84,82} +{20,52,3,3} +{72,95} +{10,9,80} +{9,9,18,51} +{74,24,63,63,57,89} +{64,91,95,18,15} +{64,37,20,36,74} +{52,9,53,6} +{17,31,42} +{3,73,92,13,62} +{57,81,58,49} +{52,56,2,26,18} +\N +\N +{90,90} +{16,92} +{66,51,7,19,10} +{100,81,69,86,95} +{48,64,81} +{87,54,73} +{6,80,100,24,26,8} +{44,67} +{27,94,2,25,34} +{80,25} +{12,2,77,75,15} +{63,14,30} +{85,75,59} +{72,73,54,44,25,76} +{95,44,69,91,62} +{94,73,78,5} +{28,52} +{86,31} +{69,90,95,66} +{6,10} +{68,72,112} +{9,165} +{91,132,164} +{57,82,144,167,184} +{3,6,101,118} +{111,158} +{22,29,30,174} +{41,66} +{39,76,189} +{7,20,21,196} +{52,126,169,171,184} +{21,77,91,176,196} +{16,97,121} +{83,135,137} +{8,140,160,164,165,195} +{38,65,185} +{112,152} +{111,129,134,148} +{47,80,114,135,147,165} +{24,98,119,123} +{43,48,60,147,154} +{19,54,138,171,186} +{156,175} +{20,51,123,193,193} +{37,41,136,173,192} +{14,22,111,125} +{44,125,160,184} +{19,75,99,103,107,164} +{24,113,145} +{27,157} +{12,107,133,134} +{72,94,102,158,194} +{104,157} +{122,171} +{28,47,89,104,112} +{25,35,82,105,155} +{106,107,139,181} +{50,110,132,136} +{90,110,166} +{1,1,55,60,85,108} +{8,22,31,106,172,196} +{24,69,109,121,154} +{0,26,44,59,132,175} +{103,125,172,188,190} +{11,23,78,109,131} +{81,146,169,181,196} +{2,84,113,189} +{8,46,126,131} +{13,73,73,125,127} +{67,117,139,184} +{29,65,77,120,182} +{0,87,100,102,135} +{111,146,156} +{13,87,123,137,182,197} +{60,61,164} +{7,20,186} +{0,24,53,135,147} +{94,136} +{47,168} +{70,80} +{43,148} +{3,81,104,191} +{104,171,189} +{9,14,117,160,180} +{67,158} +{50,57,66,78,170,197} +{31,60,73,101,193,197} +{37,89,92,96,127} +{29,179} +{17,47,137,155,157,187} +{33,77,154} +{48,63,85,150,184} +{32,53,61,95,172} +{20,35,47,171,179,196} +{2,17,40,169,184} +{116,127,131,142} +{16,26,27,87,164,198} +{58,129} +{67,98,108,132,157,197} +{145,157} +{13,49,56} +{59,103,180,196} +{35,65,104,106,120,126} +{18,96,115,133} +{27,61} +{61,194,197} +{11,27,36,94} +{15,36,101,128,197} +{51,62,115,149} +{83,198} +{30,120,127,145,184} +{50,149} +{13,35,87,117,135,158} +{57,60,74,113,128,178} +{11,90,123,163,170} +{39,121,148,171,198,199} +{30,77,78,137,140,162} +{52,69,120,141} +{9,100,137} +{56,161} +{44,57,75,110,154} +{98,123,155,167} +{10,60,85,105,164,168} +{13,92,179,186} +{13,171,173,176,178} +{33,53,88,123,144,172} +{21,57,70,131,151} +{13,51,63,169,169} +{36,104,119,166} +{54,59,84,166,172} +{7,87,100,102,142,187} +{2,5,6,43,174} +{4,26,29,59,77} +{10,82,98,103,104} +{104,147} +{47,55,99} +{102,154,165} +{0,96,107,139,157,159} +{66,167,174} +{92,97,117} +{21,75,180,185} +{54,64,139,180} +{23,141,189} +{32,38,147} +{82,87} +{6,34,34,161,183} +{25,64,69,97,122} +{80,152,170,189} +{44,78,143,162} +{52,53,64,69,112,158} +{77,80,123,150,175} +{110,121,125,125,128,198} +{0,8,57,104,127,188} +{17,46,48,93,129,150} +{135,193} +{89,111,135,166,184} +{132,181} +{47,54,101,108,125} +{18,55,103,142} +{11,125} +{18,49,58,68,122,153} +{37,47,137,179,185} +{57,78,167,187,192} +{28,32,38,67,77,184} +{67,83} +{43,104,191} +{22,40,118,194} +{24,53,66,195} +{27,87,89,101,130,191} +{71,86,157,167,183} +{31,87,102} +{48,53,70,101,149,174} +{21,33,59,129,195} +{144,160} +{4,8,174,194} +{69,103,127,127,160} +{6,29,62,77,132} +{61,69,108,144,174} +{51,55,109,128,153} +{10,30} +{2,5,6,70,146,183} +{0,1,75,97,166,180} +{53,78,104} +{31,45,68,108,161} +{3,40,78,103,109,130} +{33,44,159} +{28,82,93,136,148,157} +{31,32,76,143,157} +{2,55,106} +{21,66,80,129,129,152} +{1,34,59,128,154,195} +{10,154,172,177} +{2,7,31,47,82,125} +{60,131,149,156} +{20,141} +{23,38,43,100} +{51,70} +{3,41,164} +{126,160,165,169} +{61,71,143} +{65,70,81,100,146} +{40,48,57,75,85,85} +{116,153} +{31,42,49,103,183} +{28,44,62,85,133,177} +{50,68,164,170} +{4,26,60,87,119,141} +{5,102,160} +{20,129,177} +{98,120,135,157,164,168} +{66,150} +{101,101} +{164,187} +{43,65,96,166,189} +{18,36,58,109,118} +{25,32,135,161,170} +{55,104,183} +{69,139,144,181,182} +{84,131,155} +{6,18,63,156,159} +{7,66,67,88} +{8,46,52,95,178} +{58,58,83,119,119,163} +{27,143} +{78,80,122,149,164,176} +{6,83,107,183,198} +{86,199} +{22,74} +{28,62,64,114} +{15,56} +{41,97,139,152,161,161} +{48,192} +{16,62,99,138,155} +{32,84,145} +{108,137} +{93,112,120,155} +{73,117} +{20,26,197} +{4,141} +{110,132} +{95,133,142,152,183,193} +{85,141} +{53,76,86,131} +{5,59,73,74,101,130} +{0,1,64,151,188} +{15,131,131,174} +{80,98,106,187} +{41,102,167,173} +{9,42,133} +{103,110,110,134,175,185} +{168,187} +{42,47,108,121,165,198} +{81,171} +{38,122,123,149} +{16,79} +{45,64,131,176,182,197} +{35,82,87,100,123,196} +{41,52} +{33,68} +{60,140} +{12,41,152} +{54,71} +{88,95,95,146,148,180} +{47,66,89,121,168,182} +{15,70,94,122,137,154} +{42,48,129,162} +{70,151} +{11,55,89,118} +{36,74,121,130,152} +{46,48,52,120,179} +{70,81} +{96,146,183} +{76,78,108,153} +{71,168} +{66,106,108,167} +{22,44,49,85,87,195} +{17,52,143,175} +{86,103} +{16,46,176} +{95,111,162,173,198} +{44,191} +{7,48,107,115,116} +{12,120,141,179,184} +{83,188} +{83,85,86,139,154} +{50,74,89,154,179} +{79,87,120,128,183} +{13,121} +{16,52,62,86,168,199} +{7,16,29,35,157,181} +{23,48,65,119,180} +{10,173} +{7,98,128,143,145,162} +{23,27,88,91,127} +{35,53,56,56,118} +{7,161} +{0,42,67,174} +{44,161} +{75,80,110,170} +{17,93,117,140,168,196} +{18,100,150,184} +{108,132} +{54,90,97,103,149} +{9,12,30,43,82,95} +{131,163} +{67,99,168} +{91,150,172} +{47,164,195} +{72,90,98} +{24,78,130,172} +{1,27,32,64,66,156} +{7,26,72,88,130,140} +{56,126,130} +{1,76,81,122,169,188} +{60,154} +{101,103,135,150} +{22,25,33} +{99,117} +{24,95,122,186} +{48,95,102,108,125,170} +{13,113,154} +{155,177} +{37,73,106} +{7,64,124,195} +{101,124,133,157,166,188} +{27,34,60,100} +{26,104,163} +{34,43,108,133,165} +{64,79,89,122,132} +{10,96,168} +{2,22,89,118,122,198} +{122,192} +{42,101,104,135,168,181} +{7,38,63,86,101,152} +{29,84,89,114,123,184} +{33,46,59,137,153,175} +{3,54,66,92} +{31,34,148,159,185} +{3,52,97,99} +{3,26} +{42,57,62,148,199} +{15,26,198} +{14,34,109,111,128,193} +{107,197} +{16,107} +{9,21,136,169} +{67,97,99,153,165,173} +{46,76,89,100,164} +{96,102,150,167,180} +{31,103,137,146,180} +{21,40,157,163,170,183} +{139,170} +{1,75,82,148,169,198} +{13,39,107} +{13,50,97,101,106} +{52,176} +{18,169} +{129,140,146,183,189} +{95,122,145} +{5,6,102,130,151} +{5,118,140,153} +{27,78,140,164,182} +{36,140,148} +{58,100,127} +{9,16} +{26,33,119} +{1,17,18,165} +{14,182} +{11,13,48,89,140,165} +{9,19,78,113} +{121,171} +{18,23,46,113,159,162} +{17,104} +{50,104,132,167,179} +{55,89,102,132,176} +{19,109} +{60,70,73,153,163} +{18,127,145} +{80,106,146,170} +{10,39,72,74,84,150} +{3,71} +{1,10,64} +{82,95,127,132,141,152} +{43,55,57,89,120,197} +{155,182} +{23,34,57,111,153} +{99,188} +{86,114,124} +{113,191} +{31,129,184} +{125,159,159} +{22,27,81,156} +{3,54,80,122,128,168} +{76,112} +{152,174} +{22,27,70,172} +{26,86} +{49,59,102,186} +{53,55,75,125} +{152,199} +{11,15,46,102,105,168} +{132,148,154} +{24,114,121,126,138,165} +{82,107} +{36,93,122,184,194} +{1,59,76,146} +{73,165} +{38,98,176} +{53,72,121,153} +{127,147} +{31,77,128,177} +{107,186,189} +{119,126,127,160} +{24,74,148,197} +{85,126,134,146} +{76,77,81,134} +{67,112,159,174,183} +{22,169,170} +{79,112,177,199} +{1,56} +{21,42,50,172} +{6,63,105,166,189} +{31,95,106,152,171,177} +{21,49,99,101,122,187} +{63,104,113,161,186} +{37,126,144,166,173} +{32,53,147} +{123,123,130} +{78,85,177} +{2,69,95,146,187} +{6,11,14,43,121} +{76,105,184} +{63,96,114,122,195} +{11,22,34,45,120,156} +{22,83,119,131,138,167} +{9,56,96,106,114} +{92,132,162} +{25,45,83,119,139,150} +{19,21,56,59,141} +{14,26,62,119,180,190} +{6,34,49,99,139,170} +{10,56,150,166,166} +{14,57,119,153,167,198} +{26,41,150,158,169} +{152,167} +{1,61,93,180} +{46,110,138,199} +{4,56,81,110,173} +{28,32,148,185} +{8,9,28,29,39,195} +{14,39,68,144} +{26,37,79,81,110} +{115,158,161} +{6,39,145,191} +{67,118,125,142,184,198} +{127,163} +{52,118} +{22,78,131,156} +{46,68,86,142,145,197} +{85,188} +{37,54,64,147,158} +{31,134,141,183,185} +{10,33,135,198} +{41,124,173,180} +{0,14,92,129,154,198} +{39,73,128,154,182,196} +{40,83,94,168} +{106,142} +{76,99} +{19,62,77,108,165,186} +{68,90,97,119,176} +{44,108,193} +{2,124} +{137,174,175,176,180} +{28,62,81,132,165,186} +{98,112,148,181} +{86,125} +{70,161} +{5,13,188} +{136,168} +{82,87} +{30,42,57} +{132,136,152} +{20,59,87,98,195} +{6,53,112,113,183,195} +{64,147,157} +{61,140,192} +{44,59,88,123,161} +{90,175} +{38,46,105,121,159} +{35,62,66,90,155} +{2,2,21,38} +{123,144} +{117,155} +{60,86} +{4,39,129,146,179} +{66,71,87,135,148,157} +{29,67,108,196} +{30,64,76,124,172} +{36,39,79,130,140,149} +{30,44,136,196} +{5,15,20,117,198} +{20,87,87,121} +{42,136,142,148} +{0,56} +{16,38,56,57} +{52,138} +{103,115} +{10,29,43,93,120,134} +{44,140,150,180} +{74,98,132,160} +{2,62,98,160} +{14,32,43,63,92} +{23,87,128,152,177,197} +{30,86,111,178,180} +{49,61,114,195,196} +{133,158,195} +{18,105,165,190} +{77,83,175} +{29,33,51,166,188} +{37,51,96,103,127} +{119,125,128,140} +{8,80,93,189} +{76,96,110,131,170} +{81,90} +{13,25,28,41,128,142} +{56,62,73,110} +{60,62,128,136,166,193} +{34,34,61,74} +{32,84,87,92,112,181} +{10,66,93,153} +{23,77,182} +{2,7,156} +{5,13,49,61,103,179} +{67,136,136,163,181,196} +{26,60,74,100,160} +{39,59,69,93,111} +{9,77,90} +{1,20,52,75,156,169} +{25,95,103,157,163,193} +{95,136} +{47,108,137,157,164} +{37,99,151,153,169,189} +{112,126,139,171,184,195} +{39,188} +{4,20,71,80,136,156} +{24,33,77,82} +{103,188} +{74,116} +{82,90,110,154,194,195} +{25,149,180} +{120,123,130,171} +{20,38,104,126,175,176} +{14,62,97,130,135,193} +{35,118} +{20,42,64,73,76,120} +{11,40,60,74,144,148} +{13,26,46,63,76} +{24,29,98,106} +{6,139,171,186} +{5,109,197} +{20,45,84,125} +{1,137,150,195} +{1,8,80,111} +{57,90,102,167} +{53,186} +{8,31,115,145,156,165} +{10,18,31,116,164} +{43,47} +{33,143,154} +{106,153,174,190} +{73,106,158} +{18,137,158,173} +{73,80,107,123,141,199} +{17,43,123,130,130,155} +{15,31,37,91,164,181} +{38,86} +{49,105,142,145,173,190} +{18,107,108,135,138} +{43,65,107,112,193} +{8,68,68,74} +{54,106,108,109,164} +{53,153} +{59,134,154,173,180} +{34,93} +{11,33,124} +{8,104} +{27,37,46,65,125,174} +{0,122,189} +{15,74,107,147,188} +{35,63,78} +{28,49,123,129,177,193} +{11,89,104} +{117,171,197} +{11,15,62,136,145,145} +{2,127,193} +{17,28,42,113,145} +{31,44,118,148} +{52,103,128,161,182} +{45,47,70,102,161,184} +{15,52,82,86} +{60,87,102,108,127,170} +{24,57,102,145,181} +{12,53} +{5,52,92,129,164} +{87,128} +{80,143,170} +{59,85,134,139} +{61,67,110,117,156,157} +{6,8,60,112,154,170} +{92,122,133} +{121,148,161} +{9,22,61,187} +{12,40,78,107,176} +{30,45,58,189,198} +{83,107,123,148} +{3,66,98,124,126,150} +{13,34} +{16,41,132} +{16,85} +{3,25} +{30,58,138,167} +{24,36,87,151,159,186} +{2,4,121,196} +{79,95,99,107} +{11,49,146,169} +{51,90} +{76,155} +{26,26,116,120,146,182} +{44,66,72,117,132,174} +{7,161,179,197} +{2,81,158} +{4,22,59,107,146,170} +{0,0,133,192} +{57,82} +{17,61} +{28,29,42,77,89,124} +{53,78,127,188} +{31,57,103,104,162} +{9,84,100} +{3,52,114,133,161,188} +{8,37,97,158,189} +{0,13,88} +{29,79,92,158,160,171} +{59,63,77,139,165} +{25,77,116,169} +{50,88,151,166} +{52,162,167} +{32,149,191,194,194} +{47,57,74,95,97} +{30,65,96,153,184} +{80,130,150,172} +{79,91,141,153,157} +{93,110,114,194} +{62,66,156,175} +{55,56,97,117} +{74,152,171,186} +{13,24,50,50,131} +{0,16,95,141,146,161} +{1,51,158} +{37,71,96,122} +{71,104,145} +{47,52,124,131,169} +{111,188} +{59,61,95,152,156,157} +{5,31,106,164,176} +{44,82,113,134,188} +{13,55,65,99,150} +{25,73,130,192} +{88,120,193} +{79,123,153,175} +{24,158,162} +{52,53,81} +{5,32,78,102} +{73,97,111,151} +{71,72,102,151} +{5,61,73,85,129,151} +{66,177} +{26,77,139,152} +{46,117} +{55,72,122,148,157,174} +{3,53,76,184,196} +{34,36,41,61,194} +{8,153,163,182} +{51,59} +{113,115,149} +{54,57,78} +{39,137} +{75,81,93} +{5,30,44,80,86,126} +{68,107,128,160,179} +{98,108,162} +{55,126} +{24,54,121,122} +{75,90} +{10,83,139} +{16,120,148} +{97,175} +{53,70,71,120,135,189} +{9,110,123,150} +{24,42,44,96,138,170} +{17,61} +{23,65,110,135,155,157} +{19,59,139} +{50,65,127,179} +{15,138,152,162} +{15,34} +{25,29,63,135,161} +{47,113,123,129,163} +{25,138,157,184} +{50,92,199} +{110,116} +{15,36,134,145,165,182} +{4,75,82,175} +{24,49,63,89,128} +{174,182} +{103,116,119} +{101,125,180,192} +{47,66,113,127,148} +{15,60,118} +{20,51,90,91,117} +{25,72,146,199} +{34,93,199} +{31,71,106,115,186} +{1,10,119,144,188,197} +{49,80,185} +{134,178,188} +{42,67,170,172} +{13,43,91,91} +{13,31,48,98,155,158} +{37,44,70,76,141,160} +{50,60,72} +{51,65,166,188} +{11,103,129,144} +{136,167,181} +{165,178} +{34,107} +{54,120} +{33,132,136,165,178} +{60,79,119,127,187,197} +{27,31,130,132} +{125,129} +{97,111} +{71,171,187,191} +{68,91,94} +{94,119,159,178} +{2,29,51,173} +{37,61,97,113,147} +{11,35,79,91} +{67,71} +{4,20,103,107,169,179} +{35,77} +{71,94} +{29,31,67,101,172,174} +{52,122} +{87,125} +{129,142,164} +{13,30,85,139} +{17,57,65,170,179} +{46,65,151,167,192,197} +{31,78,132,136,158} +{38,161} +{15,101,111,134} +{42,118,139,142,178} +{57,95,132,134} +{5,42,116,152,173,192} +{144,199} +{38,70,77,143,175,188} +{38,84,93,149} +{56,98,153,165,170,191} +{1,52,112,112,131,145} +{16,132,150,184} +{14,60,111,153} +{49,109,112,165} +{69,136,152} +{59,90,94,158,168} +{42,47} +{18,194} +{33,70,94,167,175,177} +{40,57,125,138,159} +{3,10,31} +{2,5,8,26,141,181} +{27,29,142,175,186,195} +{31,49,99,120} +{109,123} +{21,76,112,119,124} +{41,49,146,173} +{101,173} +{49,73,85,89,179} +{22,36,154,192} +{136,163} +{111,165} +{94,128} +{81,167} +{35,165} +{41,109,119} +{13,74,80,114} +{72,106,189} +{65,172} +{30,31,35,52,63} +{80,116} +{0,149} +{139,189} +{0,65,107,153,179} +{15,40,46,51,75,160} +{12,28,48,79,105} +{76,98,146,157,180} +{45,62,79,83,113,155} +{130,162,184} +{78,140,145,181,196,198} +{108,168} +{3,13,14,15,77} +{22,29,68,117,142,143} +{67,110,122,167,183} +{22,25,58,93,143,151} +{53,82,170} +{1,18,50,98,108,174} +{58,140} +{49,179,196} +{109,171} +{38,82,132,183} +{32,151,175} +{53,90,106,169,187} +{99,136,141,146,171} +{27,108,111,155,192} +{28,77,86} +{11,109,118,149,154,183} +{7,74,122,137,185} +{70,110,151,154,175} +{7,48,88,181,181,182} +{97,101,105,123,139,156} +{19,139} +{17,107,134} +{63,64,178} +{100,133,143} +{64,173} +{1,88,109,120,145,160} +{113,198} +{84,112,121,184} +{90,185,193} +{91,135,155,185} +{56,191} +{14,15,48,61,92,171} +{18,139,152,199} +{16,80,107,125,144,166} +{8,92,112,173,176} +{27,196} +{9,169,183,190} +{20,29,40,98,106,182} +{77,115,149,181} +{31,65} +{7,29,62,90,157,178} +{10,33,79,186} +{42,74,113,178,192} +{17,86,88,118} +{27,58,104,122,166} +{16,97,102,105,192} +{16,59,115,127} +{27,56,60} +{104,175} +{52,84} +{127,137} +{7,13,18,81,139,140} +{11,31,81,150,189} +{44,55,107} +{45,58,127,137} +{70,76,80,93,145} +{27,60} +{40,76,172} +{7,123,192} +{55,170} +{61,137,137,184,187} +{49,50,190} +{99,126,152,164} +{56,79,88,98,132} +{45,74,119,123,158,175} +{66,96} +{100,114} +{62,84,111,122} +{8,22,141,172,181} +{70,141} +{3,48,106,193} +{33,114,168,174,183} +{46,186,194} +{58,71,82,122,190} +{60,67} +{14,30,132,144,174} +{9,113,124} +{11,14,29,63,110,182} +{4,64,102,168,178} +{90,108,110,160,165,199} +{44,86,191} +{6,19,84,125,125,156} +{53,105,122,154,175,190} +{83,177,183} +{96,103,181} +{38,156} +{2,6,60,116,131} +{12,144} +{13,73,93,132} +{142,167} +{37,61,71,75,121,144} +{32,43,146} +{41,59,144,176} +{11,14,44,54,92,177} +{37,198} +{39,80,81,104,138,193} +{13,73,92,127,149,194} +{34,57,69,104,118,186} +{7,48,84,96,108} +{32,41,64,111} +{108,131,150,174,195} +{50,53,184,191} +{8,32} +{26,76,88} +{4,50,100,134,134} +{36,40,148,158,177} +{7,16,57,59} +{35,96,113,129,167} +{46,63,128,163} +{8,46,94,97,105,178} +{12,70} +{45,93,134,135,188,195} +{11,52,76,103,131,192} +{19,45,57,119,123,136} +{19,62} +{1,49,64,197} +{0,42,60,102,134,147} +{102,152,156,160} +{51,54,129} +{50,68,71,72,170} +{0,11,184} +{19,105} +{144,185,191} +{17,51,76,98,118,135} +{52,64,143,171} +{1,46,62,74,81} +{8,36,129} +{5,25,96,113,146,152} +{19,28,59,110,131,142} +{7,18,176,179} +{17,21,48,63,121} +{34,79,81,85,152,155} +{8,82,104,122,139,193} +{34,50,128,140,175} +{51,173} +{48,128,138} +{126,129,178} +{42,51,61,141,170,180} +{59,91,144} +{64,74,118,170,191} +{12,55,116,157,159} +{97,157} +{32,34,102,105,178} +{36,103,125} +{15,36,184} +{6,13} +{0,100,144,185,198} +{32,47,64,66,118,143} +{23,112,117} +{34,44,47,81,124,135} +{21,49,115} +{29,158} +{34,114,127,151} +{111,199} +{23,53,76,113,122,123} +{89,113,117,137} +{52,76,126,155,164} +{4,48,78,114,147,179} +{27,56,151,191} +{3,183} +{30,41,72,145} +{15,41,152,177,196} +{44,58,124,164,177} +{9,51,70,174} +{13,18,81,136,178} +{85,139,142} +{12,62,118,156} +{50,142,149,175} +{35,38,99,100,128} +{53,54,92,123,153,160} +{121,133} +{12,63,117,148,149,187} +{88,153,170,192,195} +{22,51,67,104,141} +{186,198} +{39,40,82,159,189} +{59,74,149} +{88,99,136,145,191} +{5,48,90,120,138,193} +{22,76,155,180} +{118,122,141,176} +{87,104,116,159,172,191} +{63,104,155} +{8,153,168} +{119,141,178,179} +{100,110} +{14,65,164} +{2,92,97,117,188} +{47,59,64,141,148,187} +{109,137,139,151,169} +{68,78,156} +{37,39,103,183,190,194} +{50,58,74,180} +{12,121,155,175} +{26,43,97} +{102,159,161} +{3,138,163,179} +{55,69,78,164} +{67,87,136} +{67,150} +{74,113,199} +{103,126,187} +{39,141,155} +{6,19,25,75,157} +{10,49,71,105,114,154} +{3,24,35,54,88} +{16,25,73,114,181,191} +{2,2,63,154} +{68,74,107,187,199} +{13,235} +{40,122,203,232,233,235} +{115,152,193,202,242} +{3,50,86,111,248} +{25,66,181,188,279} +{80,116} +{38,83,106,119,134} +{29,63,203} +{7,27,186,200,201} +{88,92,94,272,295} +{35,68,136,158} +{148,225,293} +{1,87,195} +{48,100,203} +{0,35,61,91,280} +{130,160,168,216} +{4,104,148,164} +{35,40,91,145,155,214} +{46,107} +{21,276} +{42,143,150,238,291} +{64,70,140,165,228,257} +{0,148,197} +{72,131,195,202,251,270} +{99,195,224,264,292} +{5,184,186,243} +{93,132,137,148,228,251} +{66,125,198,211,285} +{29,79,180} +{41,60,65,66,254} +{4,69,79,207} +{113,182,203,252,259,298} +{10,20} +{99,200,276} +{109,262} +{4,87,193,207,220,273} +{30,183,215} +{7,138,202,215,217} +{25,79,194,203,260} +{128,178} +{62,152,211,279} +{57,99,204,280} +{41,59} +{18,52,200} +{81,132,190,275} +{89,158} +{32,72,122,228,245,249} +{24,72,196,233,299} +{0,5,46,122,213} +{197,242} +{43,105,241,272} +{74,118,158,173,208,288} +{145,149,197,238,252,297} +{32,39,189} +{98,240} +{65,140,149,197,203,204} +{103,225,266} +{84,277,283} +{35,246} +{10,101,239} +{40,75,192,253} +{106,152,247,272,287} +{50,293} +{85,134} +{59,204} +{54,64,88,269,286} +{4,92,111,121,154,182} +{80,163,202,234,277,298} +{129,147,158,196,283,290} +{49,144,232,293} +{20,29,226,244,274} +{64,101,185,189,234,268} +{23,157} +{56,93,133} +{9,57,241,289} +{50,124,181,194,238} +{11,38,67,69,213} +{149,220} +{168,189,267} +{34,133,235,264,284} +{81,239,241,260} +{35,78,80,201,262,297} +{0,196,285} +{71,108,239,258,277,278} +{4,94} +{77,132,140,251} +{11,78,132} +{43,145,188} +{97,144,148,161,254} +{109,132} +{48,83,189,242} +{115,176,276} +{162,210} +{88,109,136,153,154,159} +{265,280} +{74,86,195} +{17,112,188,213,231,266} +{36,136,160,218,239} +{179,273} +{79,118,136,154,200,259} +{161,212} +{24,98,178} +{161,187} +{45,169,227,236} +{218,253} +{10,18,74,258} +{70,199,210,213,285,291} +{12,50,69,92,184,186} +{130,131,163,295} +{198,239,297} +{49,86,125,176,234,282} +{7,129,146,223,269} +{144,173} +{30,52,133,228} +{21,88,176} +{5,70,299} +{37,69,285} +{14,17,45,72,99,197} +{125,196} +{30,220} +{55,103,127,251} +{108,114,156,200,207,217} +{7,195,250} +{64,111,193,202,236} +{92,115,232,233,240} +{22,232,260} +{18,44,191,253,294} +{40,158} +{86,92,103,154,207,294} +{33,177,191,223,235} +{65,116,158,253} +{49,125,152,194} +{100,149,191,266,288} +{13,64,103,254,283} +{42,75,80,103,155} +{77,128,198,280} +{118,218,287} +{0,36,52,101,148} +{1,64,181,201,221} +{6,44,47,71,150,225} +{13,85,88,167} +{31,40,69,91,99,281} +{60,115,157,224,252,273} +{30,87,200,270,285} +{171,293} +{24,33} +{59,69,74,118,190,216} +{147,258,288} +{62,73,219,232,266} +{50,74,225,238,271} +{6,88,115,185,205,262} +{97,230} +{76,76,150,211,228,262} +{134,195} +{104,235} +{38,41,204} +{64,71,124} +{44,63,111,231} +{186,188} +{5,132,225} +{113,286} +{43,161,276} +{8,294} +{18,90,105,169} +{213,213} +{29,45,161,289} +{79,152} +{10,110,162,177,217,238} +{63,98,192,244} +{118,147,187,281} +{5,15,36,94,263} +{40,81,220} +{29,74,76,157,162,206} +{11,28,53,68,126,222} +{73,73,181,239} +{36,60,164} +{16,47,82,152,167,289} +{149,149,219,268,294} +{97,169} +{32,160,210,257} +{32,69} +{7,63,73,195} +{54,110} +{61,75,135,270} +{22,43,127,174,184,208} +{106,113,174} +{0,70,90,239} +{191,260} +{43,80,168} +{25,54,257,263} +{118,213} +{110,207,220,251,287} +{126,139,161,248,252} +{51,79,116,132,190,291} +{183,199,200,254} +{86,233} +{105,109,176,211} +{12,109} +{3,65,158} +{21,86} +{12,15,191} +{181,223,224,256,259,276} +{112,191,219,232,239} +{51,215} +{36,46,278} +{68,75,169,228,244,270} +{10,16,52,172,189,274} +{177,191,197,209,222,282} +{41,119,190,202} +{128,277,292,298} +{34,38} +{22,36,81,117} +{81,161,248,279} +{75,85,103,149,190,211} +{127,279} +{50,74,152} +{122,168,209,240,276,282} +{66,102,208,239,291} +{9,113} +{72,199,237} +{110,112,135,141,270} +{26,109,130,159,291} +{108,206} +{2,289} +{63,238} +{4,57,104,119,142,214} +{46,97,239} +{210,297} +{207,268} +{13,64,80} +{62,109,171,195,232} +{11,260,262,276,292} +{21,75,78,80,140,226} +{38,56} +{122,251,297} +{108,180,213} +{57,58,135,231,233} +{75,136,185,211} +{52,109,122,174,178,255} +{65,91,234,249} +{5,24,53,218} +{90,211,246} +{106,242,260} +{61,136} +{49,87,177,280} +{38,89,104,189,297} +{43,76,293,298} +{182,255,289} +{25,57,64,272} +{23,122,149} +{49,50,129,153} +{183,188,204} +{27,164,226,230} +{0,13,67,145,160,259} +{22,32,43,85,105,124} +{20,199} +{31,119} +{14,16,152,158,196} +{5,59,91,202,217,280} +{100,128,187} +{20,193,214,258,272} +{17,27,55,151,177,219} +{53,55,63,208,213,230} +{15,160,258,260} +{71,147,235,258} +{26,49,173,234,271} +{50,52,58,167,257} +{15,154,213,232} +{6,35,86,94,286} +{0,4,83,262,281} +{93,148,284} +{28,165,262,290} +{18,99,160,266} +{63,223,291,295} +{103,154,180} +{12,110,144,221} +{9,158,203} +{20,207,275} +{9,20,48,88,120,289} +{67,110,133,151,225,297} +{71,102} +{168,208} +{48,137,163,164,280,287} +{90,209} +{28,244} +{107,224,293} +{86,206} +{8,113,147,165,285,286} +{7,159,160,237} +{0,66,87,146,225,294} +{58,100,112,124,189} +{13,108} +{121,168,216,253} +{147,242,282} +{236,240} +{21,28,83,103,166} +{30,88,108,280,295} +{23,136,298} +{125,290} +{140,249,276,277} +{49,81,135,147,164,267} +{28,63,198,297} +{30,101,216,232,267,287} +{54,195,204,223,236,251} +{27,176,179,204,264,291} +{136,164,172,273} +{43,67,81,121,277} +{128,131,256,269} +{176,219,289} +{127,175,259} +{35,94,153,177,222,253} +{29,154,178,240,260} +{165,176,201,243,259} +{17,298} +{29,203,232,241,289} +{107,136,153,238} +{49,198} +{68,179,202,253} +{157,178} +{23,199,287} +{131,228} +{19,19,39,111,138,277} +{49,86,178,194,223,226} +{114,201} +{149,282} +{109,147,150,176,209,229} +{122,131,167,228,258} +{5,40,120,154,266} +{135,207,238,263} +{75,128} +{80,117,296} +{60,82,122,131,138} +{57,146,159,233,244,278} +{15,80,157,182,244,272} +{114,116,160,176,287} +{10,133,279} +{27,115,126,293} +{89,161} +{95,120,218} +{26,269} +{109,281} +{53,62,103,107,118,239} +{185,186,227,252} +{3,125,146,161,288} +{171,245,256,283} +{23,153,201,238} +{0,82,93,218,242} +{101,124,137,150,194} +{21,96,104,201,244,266} +{88,121,147,155,173,225} +{24,106,112,193} +{26,67,115,212,283} +{23,120,280} +{45,99} +{30,66,136,199} +{17,213} +{14,37,55,103,265} +{52,258,284} +{119,213,272,274,285} +{43,45,105,254,288} +{64,81,123,126,164,292} +{88,229,260} +{25,117} +{7,149,197,227,258} +{74,83,240,246,284,292} +{2,4,63,103,115,289} +{92,239} +{12,26,130,228,265} +{53,99,131,142,164,291} +{63,248,259,283} +{186,215,282} +{67,110,160} +{166,191} +{33,156,224} +{152,166,190,250,297} +{123,126,153,199,204} +{49,70,199,238,238,289} +{14,18,65,74,146,235} +{63,77,172,180,186,225} +{1,48,105,170} +{37,56,113,133,196} +{193,261,266} +{190,273} +{38,129,261} +{251,252,253,254,275,296} +{249,275} +{167,205,266} +{27,152,256} +{19,72,248} +{40,73,141,249} +{105,197} +{156,243,277,282} +{165,168,227,298} +{8,31,202,271} +{10,101,109,167,236,277} +{33,91,165,192,206,211} +{102,122,232} +{190,239,283} +{160,185} +{2,13,65,70} +{11,68,170,192,229,284} +{66,90,228,237} +{1,6,92,99,222,242} +{42,128,133,207,289} +{12,100,164,191} +{26,31,120,176,204,220} +{13,39,95,105,120,182} +{114,120,295} +{31,34,55,181,197,235} +{24,52,64,80,142} +{3,49,148,255,268} +{132,175,254} +{32,71,141} +{112,116,186,270,271} +{64,106,209,228,297} +{128,268} +{107,208,299} +{151,173,187,192,213} +{3,296} +{20,31,135,153,289} +{138,193,212,269,277,288} +{73,92,130,295} +{73,80,105} +{50,96,138,199,265} +{4,7,8,183,260,267} +{66,71,118,145} +{15,63,116,160,175,181} +{88,217} +{56,69,106,106,127,274} +{84,205} +{83,101,241,269} +{21,254} +{22,32,83,150,293} +{198,221} +{30,46,95,179,197} +{46,85,208} +{56,112,236} +{71,217} +{31,57,145,253} +{34,133,170} +{48,53,119,187,268,287} +{111,203,229,239} +{62,136} +{49,54,187,254,298} +{20,26,148,159,190,286} +{3,13,193,252,284} +{40,137,154,167,248,259} +{3,47,242,278} +{77,100,143,232} +{51,130} +{66,90,148,220,242,273} +{143,151,211} +{10,23} +{21,30,179} +{17,47,105,156,193,213} +{0,23,25,125,144,146} +{179,209} +{79,113,117,192} +{5,53,216,275,285} +{187,197} +{22,68,218,221} +{0,71,78,110,120,173} +{46,97,117,149,253,286} +{10,20,129,162,171,195} +{60,97,130,163,190} +{57,145,179,283} +{99,274} +{151,161,228,251} +{3,177,192,286} +{21,81,142} +{180,283} +{13,102,131,149,246} +{19,99,132,162,167,257} +{15,86,188,260} +{203,251,281} +{5,45,138,155,157} +{1,2,4,213,278} +{21,123,208,219,263,267} +{36,106,181,231,238} +{103,120,168,184,224,287} +{53,104,139,251} +{1,91,141,202,268} +{75,115,216,253} +{56,167,268,296} +{66,158,235,249} +{82,124,198} +{56,67,112,140,170,176} +{16,75,266} +{38,165,200,219,291,297} +{86,151,229,241,275} +{0,57,141,176,229,258} +{18,72,164,195,235} +{94,282} +{83,139,242,269,294} +{9,44,145,251,272} +{132,203,249,282} +{7,41,170,254} +{6,153,193,291} +{18,134,137,227,261} +{14,36,115,124,172,229} +{54,206} +{49,91,131,185,204} +{7,242} +{41,57,161} +{93,224,241,288} +{119,288} +{90,99,117,196,296} +{67,85,154} +{147,169,216,264} +{79,92,164} +{19,120,132,197,267} +{76,264} +{30,133} +{27,37,93,138,218} +{152,155,244} +{41,149,182,259} +{29,178,224} +{115,201,268} +{141,166,253,282} +{3,65,125,245,264} +{6,150,159,202,206,277} +{217,276} +{28,96,144,193} +{7,59,190} +{144,217} +{10,79,96,100,126,222} +{7,61,253} +{14,69,263} +{3,30,63,125,186,277} +{2,10,79,100,223} +{131,131,239} +{116,195,199,240} +{87,99,158} +{52,180} +{7,12,140,208,275} +{65,67,83,280} +{4,52,125,126,137,176} +{9,48,79,203,217,243} +{43,206,251} +{19,112,196,263,266} +{29,70,256} +{161,236,258} +{8,25,42,97,291} +{63,144,242,271} +{7,17} +{1,85,250} +{104,244,250} +{18,22,31,99,266,281} +{51,138,237,268,288} +{8,40,91,221,273} +{0,176,230,249,254,255} +{44,140,176,194,197} +{56,197,264} +{229,246,283} +{53,128,173,233,282} +{45,193,221} +{21,80,286} +{4,18,267} +{15,97,220} +{62,70,83,147,149,244} +{120,134,159,174,250} +{116,269} +{23,108} +{10,91,239} +{7,128,142,243,286} +{134,201,245,275,278} +{13,208,227,288} +{30,78,85} +{107,179} +{31,59,153,217,240,298} +{27,130,233,282,286} +{15,59,136,262} +{85,186,233} +{10,152,165,181,181} +{137,183} +{40,56,125,256,265,280} +{12,22,120,183} +{62,229} +{38,59,81,113,261} +{67,194,229} +{7,173} +{37,43,296} +{59,162,285} +{171,200,213,213} +{116,123,209,234,277} +{52,175} +{189,213} +{30,94,99,228,238} +{46,101,154,260,272,274} +{30,32,59} +{65,172,292} +{18,22,131,170,271} +{2,53,88,104,264,265} +{60,194,288} +{15,108,121,161,201} +{40,85,173,195,201,221} +{54,86,107,174,287} +{20,71,190,227} +{16,46,66,175,197,252} +{130,243,252,282} +{142,219,266,272} +{14,202,204,231,241,276} +{161,172,212,222} +{15,183,275} +{83,270} +{67,204} +{65,184,264} +{73,119,183,190,242} +{53,287} +{24,171} +{72,220,220} +{101,136,176,204,224,280} +{39,47,282} +{106,162,238,252} +{23,242,247,265} +{98,108,189,209,273} +{122,245,270} +{109,127,128,244,299} +{41,162,186,191} +{60,196} +{0,123,129,213,248} +{29,79,89,91} +{172,298} +{122,140,162,228,263,268} +{2,116,247,294} +{6,138} +{17,98,287} +{53,166,187,219,248,296} +{15,26,90,175,196} +{184,193,198} +{17,69,76,105,183,264} +{56,101,110} +{15,108,139,168,272} +{5,71,104,141} +{136,179} +{72,189} +{54,79,208} +{98,113,150,184,190,246} +{37,69,132,210,285} +{1,29,45,74,109,145} +{11,72,133,149,216} +{34,57,84,212,280} +{131,211,294} +{70,84,173} +{193,213,230,266,285,299} +{57,94,163,182,227} +{44,133,143} +{31,32,211} +{130,142,165,188,194,231} +{52,61,139,226,239,287} +{7,103,157} +{155,224,230} +{127,135,139} +{77,237,294} +{10,213,278} +{28,90,185,274} +{59,105,282,297} +{39,128,174,268} +{32,158,215} +{24,145,189,213,278} +{78,148,230,263} +{42,68,93,160,287,299} +{4,12,70,91,191,237} +{20,294} +{45,53,77,113,211,240} +{232,237} +{125,152,284} +{58,81,155,215,296} +{4,8,44} +{1,52,102,128,184,218} +{185,199,226,299} +{10,178,262,285} +{80,95,230,240,266} +{4,5,213} +{156,187,271,298} +{88,298} +{109,233,290} +{47,65,91,105,249,269} +{97,129} +{46,92,207} +{2,163,249,259,291} +{89,102,140,158,231} +{162,184,283} +{36,213} +{163,259} +{47,220,250} +{37,89,105,124,143,198} +{3,71} +{142,165,190,256,269,269} +{152,256} +{27,49,191,198,220,285} +{71,73,87,189,260} +{11,54,90,106,130,216} +{193,245,252} +{2,8,57,91,163,184} +{18,171,283} +{28,41,110,112} +{5,57} +{137,262,285} +{19,57,156,229,269} +{138,179,190,199,281} +{35,98,196,242} +{122,152} +{83,132,181,212,280,288} +{219,298} +{57,88,103} +{5,203} +{98,156,266} +{10,45,72,169,211} +{45,101,156,214,269} +{68,73,81} +{16,127,259} +{9,32,246} +{66,173,261,261,274} +{17,115,157,169,251} +{49,158} +{25,37} +{2,73,103,178,194,236} +{238,269,273} +{162,178,276} +{48,52,160,237,288} +{54,82,130,135,169,275} +{29,142} +{205,249,253,275,291} +{60,76,84,115,126} +{48,108,153,213,231} +{23,124,175,210,226,293} +{9,181} +{20,99,112,166,201,242} +{102,150,201} +{41,98,240,244,260} +{7,44,98,293} +{0,125,177,283} +{28,118,124,148,241,290} +{73,91,122} +{9,72,109,130,202,290} +{70,111,120,160,216,262} +{59,175,296} +{2,201} +{83,297} +{76,293} +{83,127,136,242,275,285} +{169,190,195} +{83,122,186,189,217,229} +{98,210,229} +{117,133} +{74,294} +{6,31,59,143,156,273} +{98,180,241} +{26,52,114,243} +{112,240} +{104,217} +{148,162,259,279} +{92,101,150,226,272,295} +{55,86,118,202,237,275} +{81,203} +{79,126,177,265} +{57,193} +{169,240,244} +{21,171,190,250,263} +{23,37,215,235} +{40,54,240,286} +{105,177,190,276,285} +{44,45,122,151} +{28,31,187} +{127,135,211} +{5,13,150,194,259} +{136,181,280} +{20,147,158,189,200} +{15,83,88,128,169} +{10,14,25,26,150,158} +{42,101,172,205} +{85,185,226,236,271} +{34,127,188,250,268} +{27,143} +{26,48,99,110,117,207} +{22,56,190,269,287} +{200,278} +{70,134,138,204,216,298} +{175,219,297} +{99,273} +{206,216} +{23,214} +{131,140} +{11,140,240} +{73,148} +{7,66,125,210} +{2,61,92} +{0,137} +{143,188,265} +{177,238} +{0,93,163,229} +{35,49} +{8,8,111,144,165} +{99,278} +{21,44,71,224,252,270} +{119,150,175,233,245,294} +{15,87} +{84,211,217,225} +{20,41,87,123,124,299} +{62,120,169} +{37,43,92,175,206,222} +{95,168,180,250,269,296} +{60,228,278,285} +{173,195,232,276} +{1,2,139,256,278} +{51,119} +{212,238,291} +{120,172,292} +{138,279} +{251,261} +{151,181,278,296} +{163,207,220,289,295} +{89,278,290} +{24,137,157,206,271,278} +{7,63,83,89,155,189} +{2,5,172,195,215,260} +{243,281} +{60,125} +{74,87,222,236} +{45,70,159,194} +{69,159,250} +{150,214,296} +{101,158,250} +{56,134} +{57,87,160,167,247,285} +{123,269} +{235,242} +{79,95,115,167,287} +{31,56,132,244,276} +{25,218,241,241} +{57,82,151,170,204} +{69,103,288} +{88,138,154,292} +{14,98,138,227,245,249} +{175,222,274} +{38,139,193,208,277} +{79,141} +{5,77,197,209} +{15,37,77,110,116} +{26,226} +{68,93,101,140,233} +{53,96,170,192,290} +{29,89,102,216,220} +{11,85,136,239} +{158,180,195,200,226} +{10,49,118,137,172} +{144,172,183} +{14,176,188,215,272} +{42,97,125} +{114,166} +{52,61,162,171,249} +{140,195,242} +{59,99,233} +{31,76,136,181,187} +{81,112,157,168,271,294} +{8,35,44,48,190,297} +{145,195,201} +{160,248,291} +{94,270,285} +{116,139,225} +{111,131,140} +{158,277} +{59,229,257} +{25,47,99,123,239} +{8,36,205,274,295} +{132,152,178,192,235} +{19,40,96,204} +{7,77} +{211,282} +{26,100,180,244,281,296} +{200,212,286} +{5,94,151,290} +{75,80,128,179,269,269} +{7,111} +{7,26,69,158,269,276} +{7,36,74,94,171,215} +{2,62,65,93,124,271} +{78,96,109,189} +{182,197,280,298} +{17,78,82,85,85,208} +{6,122,155} +{14,33,130} +{1,21,167,169} +{49,85,158,175,213} +{59,194} +{125,132,259,285} +{20,38,81,89,234,274} +{106,140,156,287} +{57,125} +{53,103,158,204,234,267} +{0,49,160,189,235} +{34,115,142,207} +{162,173,181,190,298} +{11,76,116,166,191} +{2,87,99,236,279} +{40,203} +{2,33,39,215,254} +{53,69,83,224,228} +{79,136,183,216,226,227} +{10,109,137,163,240} +{24,126,141} +{69,255} +{103,138,230,246,259,283} +{136,290} +{13,34,78,145,166,242} +{38,74,83,242,294} +{54,248,273} +{107,162} +{50,170,176,191,207,275} +{32,134,166,288,292} +{163,167,186,274,291,296} +{31,86,123,156,160} +{114,133,136,176,281,290} +{105,147,211} +{124,151,179,222,299} +{87,101} +{145,169,181,205,247} +{6,266} +{26,33,52,56,106,116} +{19,21,65,89,104,168} +{164,181,208} +{36,67,92,116,248} +{145,200,247} +{155,215} +{49,212} +{29,57,105,117,131} +{2,13,68,128,139,140} +{193,273,273} +{3,78,105,111,297} +{49,142,244} +{32,259} +{161,205} +{96,146,179,259} +{44,45,211,233} +{56,91,146,166,285} +{87,107,120,262,299} +{76,160,276,297} +{248,266} +{5,12,188,240,247} +{164,206,293} +{15,18,60,163} +{53,134,172,230,287,290} +{117,137,146,153,155} +{72,270} +{171,251} +{80,125,137,141,169} +{52,108,200,219,225,271} +{29,78,106,221} +{21,74,110,273} +{28,88,98,170} +{83,104} +{12,152} +{7,69,143,246,265,269} +{62,106,157,200} +{113,260,272,272,294} +{16,35,80,121,165,176} +{96,154,172,198,263} +{29,53,109,128,129,195} +{131,230,271,273,295,299} +{53,160,208,231} +{23,180,208,249,272} +{45,208,264} +{14,29,169} +{116,147,272} +{7,193,237,271} +{158,198,253} +{41,60,71} +{110,133,200,249} +{24,159,255} +{26,39,61,114,218,229} +{141,286,299} +{74,278} +{67,71,155} +{151,257,284} +{13,28,72,131,206} +{60,152,275,295} +{88,105,184,185} +{85,190,205,256,283,285} +{202,285} +{14,92,160,200,246,279} +{42,95,157,195} +{50,99,224,276} +{32,97,101,122} +{66,85} +{19,146,180,242,269,286} +{24,86,247,274} +{54,264,270,284} +{72,77,85,124,127,285} +{47,249} +{25,73,102,237} +{33,68,84,117,120} +{29,62,172,240,242,273} +{42,140,182,248,261,282} +{118,228,284} +{1,89,158,294} +{29,89,122,155,208,283} +{173,208,229} +{6,22,142,267,299} +{22,122,173,245,293} diff --git a/expected/array.out b/expected/array.out index 03bcd6c71e..92864d95e4 100644 --- a/expected/array.out +++ b/expected/array.out @@ -841,3 +841,37 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; (2 rows) DROP INDEX idx_array; +/* + * Check ordering using distance operator + */ +CREATE TABLE test_array_order ( + i int2[] +); +\copy test_array_order(i) from 'data/rum_array.data'; +CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; + QUERY PLAN +------------------------------------------------------ + Index Scan using idx_array_order on test_array_order + Index Cond: (i @> '{23,20}'::smallint[]) + Order By: (i <=> '{51}'::smallint[]) +(3 rows) + +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; + i | ?column? +---------------------+------------------ + {20,23,51} | 1.73205080756888 + {33,51,20,77,23,65} | 2.44948974278318 + {23,76,34,23,2,20} | Infinity + {20,60,45,23,29} | Infinity + {23,89,38,20,40,95} | Infinity + {23,20,72} | Infinity + {73,23,20} | Infinity + {6,97,20,89,23} | Infinity + {20,98,30,23,1,66} | Infinity + {57,23,39,46,50,20} | Infinity + {81,20,26,22,23} | Infinity + {18,23,10,90,15,20} | Infinity +(12 rows) + diff --git a/sql/array.sql b/sql/array.sql index 732a0b805d..e3869b06d3 100644 --- a/sql/array.sql +++ b/sql/array.sql @@ -197,3 +197,17 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; DROP INDEX idx_array; +/* + * Check ordering using distance operator + */ + +CREATE TABLE test_array_order ( + i int2[] +); +\copy test_array_order(i) from 'data/rum_array.data'; + +CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); + +EXPLAIN (COSTS OFF) +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; diff --git a/src/rumget.c b/src/rumget.c index 33c0672acf..42058eaf5d 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -1519,6 +1519,7 @@ scanGetItemRegular(IndexScanDesc scan, RumItem *advancePast, continue; } match = false; + break; } if (match) @@ -1540,9 +1541,23 @@ scanGetItemRegular(IndexScanDesc scan, RumItem *advancePast, RumScanKey key = so->keys[i]; if (key->orderBy) - continue; + { + int j; - if (key->recheckCurItem) + /* Catch up order key with *item */ + for (j = 0; j < key->nentries; j++) + { + RumScanEntry entry = key->scanEntry[j]; + + while (entry->isFinished == false && + compareRumItem(rumstate, key->attnumOrig, + &entry->curItem, item) < 0) + { + entryGetItem(rumstate, entry, NULL, scan->xs_snapshot); + } + } + } + else if (key->recheckCurItem) { *recheck = true; break; From 002356c72631587bf957539b6f70031084eeda3e Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Tue, 26 Dec 2017 18:16:50 +0300 Subject: [PATCH 013/182] Fix building RUM after commit 9fa6f00b. --- src/rum.h | 16 ++++++++++++++++ src/ruminsert.c | 23 +++++++---------------- src/rumscan.c | 14 ++++---------- src/rumsort.c | 6 +----- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/rum.h b/src/rum.h index 11f20300fd..ec387c101f 100644 --- a/src/rum.h +++ b/src/rum.h @@ -1057,4 +1057,20 @@ extern Datum FunctionCall10Coll(FmgrInfo *flinfo, Oid collation, Datum arg6, Datum arg7, Datum arg8, Datum arg9, Datum arg10); +/* PostgreSQL version-agnostic creation of memory context */ +#if PG_VERSION_NUM >= 110000 + #define RumContextCreate(parent, name) \ + AllocSetContextCreateExtended(parent, name, \ + MEMCONTEXT_COPY_NAME, \ + ALLOCSET_DEFAULT_MINSIZE, \ + ALLOCSET_DEFAULT_INITSIZE, \ + ALLOCSET_DEFAULT_MAXSIZE) +#else + #define RumContextCreate(parent, name) \ + AllocSetContextCreate(parent, name, \ + ALLOCSET_DEFAULT_MINSIZE, \ + ALLOCSET_DEFAULT_INITSIZE, \ + ALLOCSET_DEFAULT_MAXSIZE) +#endif + #endif /* __RUM_H__ */ diff --git a/src/ruminsert.c b/src/ruminsert.c index f2870afcb2..00f5154edf 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -633,17 +633,11 @@ rumbuild(Relation heap, Relation index, struct IndexInfo *indexInfo) * create a temporary memory context that is reset once for each tuple * inserted into the index */ - buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext, - "Rum build temporary context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - - buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext, - "Rum build temporary context for user-defined function", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + buildstate.tmpCtx = RumContextCreate(CurrentMemoryContext, + "Rum build temporary context"); + + buildstate.funcCtx = RumContextCreate(CurrentMemoryContext, + "Rum build temporary context for user-defined function"); buildstate.accum.rumstate = &buildstate.rumstate; rumInitBA(&buildstate.accum); @@ -813,11 +807,8 @@ ruminsert(Relation index, Datum *values, bool *isnull, Datum outerAddInfo = (Datum) 0; bool outerAddInfoIsNull = true; - insertCtx = AllocSetContextCreate(CurrentMemoryContext, - "Rum insert temporary context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + insertCtx = RumContextCreate(CurrentMemoryContext, + "Rum insert temporary context"); oldCtx = MemoryContextSwitchTo(insertCtx); diff --git a/src/rumscan.c b/src/rumscan.c index a730772a14..27d7f05c2d 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -35,16 +35,10 @@ rumbeginscan(Relation rel, int nkeys, int norderbys) so->firstCall = true; so->totalentries = 0; so->sortedEntries = NULL; - so->tempCtx = AllocSetContextCreate(CurrentMemoryContext, - "Rum scan temporary context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); - so->keyCtx = AllocSetContextCreate(CurrentMemoryContext, - "Rum scan key context", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + so->tempCtx = RumContextCreate(CurrentMemoryContext, + "Rum scan temporary context"); + so->keyCtx = RumContextCreate(CurrentMemoryContext, + "Rum scan key context"); initRumState(&so->rumstate, scan->indexRelation); diff --git a/src/rumsort.c b/src/rumsort.c index 748c1191d4..3b330476df 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -877,11 +877,7 @@ rum_tuplesort_begin_common(int workMem, bool randomAccess) * Create a working memory context for this sort operation. All data * needed by the sort will live inside this context. */ - sortcontext = AllocSetContextCreate(CurrentMemoryContext, - "TupleSort", - ALLOCSET_DEFAULT_MINSIZE, - ALLOCSET_DEFAULT_INITSIZE, - ALLOCSET_DEFAULT_MAXSIZE); + sortcontext = RumContextCreate(CurrentMemoryContext, "TupleSort"); /* * Make the Tuplesortstate within the per-sort context. This way, we From 4673f9741a225d9308a7b506315d7dffc564515d Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Jan 2018 11:15:01 +0300 Subject: [PATCH 014/182] Fix tests for 32-bit platforms --- expected/array_1.out | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/expected/array_1.out b/expected/array_1.out index 7424724b52..03bcd6c71e 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -525,7 +525,14 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info (4 rows) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; -ERROR: doesn't support order by over pass-by-reference column + i | add_info +-----------+-------------------------- + {1} | Thu May 19 14:21:25 2016 + {1,2} | Fri May 20 14:21:25 2016 + {1,2,3} | Sat May 21 14:21:25 2016 + {1,2,3,4} | Sun May 22 14:21:25 2016 +(4 rows) + DROP INDEX idx_array; /* * Sanity checks for popular array types. From 01ca6f5cf1f470f11ba81f65140695a018ea2b24 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Jan 2018 12:35:03 +0300 Subject: [PATCH 015/182] Revert "Fix tests for 32-bit platforms" This reverts commit 4673f9741a225d9308a7b506315d7dffc564515d. --- expected/array_1.out | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/expected/array_1.out b/expected/array_1.out index 03bcd6c71e..7424724b52 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -525,14 +525,7 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info (4 rows) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; - i | add_info ------------+-------------------------- - {1} | Thu May 19 14:21:25 2016 - {1,2} | Fri May 20 14:21:25 2016 - {1,2,3} | Sat May 21 14:21:25 2016 - {1,2,3,4} | Sun May 22 14:21:25 2016 -(4 rows) - +ERROR: doesn't support order by over pass-by-reference column DROP INDEX idx_array; /* * Sanity checks for popular array types. From 68e5a8b0b685728e1bf65778629b1cdb3df4f0b2 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Jan 2018 13:13:58 +0300 Subject: [PATCH 016/182] Add missed array test results for 32-bit platforms --- expected/array_1.out | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/expected/array_1.out b/expected/array_1.out index 7424724b52..e88ae50589 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -834,3 +834,37 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; (2 rows) DROP INDEX idx_array; +/* + * Check ordering using distance operator + */ +CREATE TABLE test_array_order ( + i int2[] +); +\copy test_array_order(i) from 'data/rum_array.data'; +CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; + QUERY PLAN +------------------------------------------------------ + Index Scan using idx_array_order on test_array_order + Index Cond: (i @> '{23,20}'::smallint[]) + Order By: (i <=> '{51}'::smallint[]) +(3 rows) + +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; + i | ?column? +---------------------+------------------ + {20,23,51} | 1.73205080756888 + {33,51,20,77,23,65} | 2.44948974278318 + {23,76,34,23,2,20} | Infinity + {20,60,45,23,29} | Infinity + {23,89,38,20,40,95} | Infinity + {23,20,72} | Infinity + {73,23,20} | Infinity + {6,97,20,89,23} | Infinity + {20,98,30,23,1,66} | Infinity + {57,23,39,46,50,20} | Infinity + {81,20,26,22,23} | Infinity + {18,23,10,90,15,20} | Infinity +(12 rows) + From 845db08c03dd6bf5ffb0ec975d48dde967eb3a2b Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Sat, 17 Feb 2018 03:50:33 +0300 Subject: [PATCH 017/182] Adopt RUM to current master. --- src/rum_arr_utils.c | 8 ++++- src/ruminsert.c | 5 ++++ src/rumsort.c | 73 ++++++++++++++++++++++++++------------------- 3 files changed, 55 insertions(+), 31 deletions(-) diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 9f901000bf..46233e020d 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -102,6 +102,12 @@ typedef struct SimpleArray } SimpleArray; +#if PG_VERSION_NUM < 110000 +#define SearchSysCacheList(A, B, C, D, E) \ +SearchSysCacheList(A, B, C, D, E, 0) +#endif + + float8 RumArraySimilarityThreshold = RUM_SIMILARITY_THRESHOLD_DEFAULT; int RumArraySimilarityFunction = RUM_SIMILARITY_FUNCTION_DEFAULT; @@ -563,7 +569,7 @@ getAMProc(Oid amOid, Oid typid) */ catlist = SearchSysCacheList(CASTSOURCETARGET, 1, ObjectIdGetDatum(typid), - 0, 0, 0); + 0, 0); for (i = 0; i < catlist->n_members; i++) { HeapTuple tuple = &catlist->members[i]->tuple; diff --git a/src/ruminsert.c b/src/ruminsert.c index 00f5154edf..566d84d592 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -32,6 +32,11 @@ typedef struct BuildAccumulator accum; } RumBuildState; +#if PG_VERSION_NUM >= 110000 +#define IndexBuildHeapScan(A, B, C, D, E, F) \ +IndexBuildHeapScan(A, B, C, D, E, F, NULL) +#endif + /* * Creates new posting tree with one page, containing the given TIDs. * Returns the page number (which will be the root of this posting tree). diff --git a/src/rumsort.c b/src/rumsort.c index 3b330476df..70d1cb708a 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -162,6 +162,19 @@ bool optimize_bounded_sort = true; #define LogicalTapeRewindForWrite(x, y) LogicalTapeRewind((x), (y), true) #endif +#if PG_VERSION_NUM >= 110000 +#define RUM_SORT_START(INT1, INT2, INT3, INT4, INT5) \ +TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5, false) +#else +#define RUM_SORT_START(INT1, INT2, INT3, INT4, INT5) \ +TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5) +#endif + +#if PG_VERSION_NUM >= 110000 +#define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, NULL, NULL, 1) +#define LogicalTapeFreeze(X, Y) LogicalTapeFreeze(X, Y, NULL) +#endif + /* * The objects we actually sort are SortTuple structs. These contain * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), @@ -963,11 +976,11 @@ rum_tuplesort_begin_heap(TupleDesc tupDesc, state->nKeys = nkeys; - TRACE_POSTGRESQL_SORT_START(HEAP_SORT, - false, /* no unique check */ - nkeys, - workMem, - randomAccess); + RUM_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess); state->comparetup = comparetup_heap; state->copytup = copytup_heap; @@ -1025,11 +1038,11 @@ rum_tuplesort_begin_cluster(TupleDesc tupDesc, state->nKeys = RelationGetNumberOfAttributes(indexRel); - TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, - false, /* no unique check */ - state->nKeys, - workMem, - randomAccess); + RUM_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess); state->comparetup = comparetup_cluster; state->copytup = copytup_cluster; @@ -1085,11 +1098,11 @@ rum_tuplesort_begin_index_btree(Relation heapRel, state->nKeys = RelationGetNumberOfAttributes(indexRel); - TRACE_POSTGRESQL_SORT_START(INDEX_SORT, - enforceUnique, - state->nKeys, - workMem, - randomAccess); + RUM_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess); state->comparetup = comparetup_index_btree; state->copytup = copytup_index; @@ -1162,11 +1175,11 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, state->nKeys = nKeys; - TRACE_POSTGRESQL_SORT_START(INDEX_SORT, - false, /* no unique check */ - state->nKeys, - workMem, - randomAccess); + RUM_SORT_START(INDEX_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess); state->comparetup = comparetup_rum; state->copytup = copytup_rum; @@ -1195,11 +1208,11 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) "begin rumitem sort: workMem = %d", workMem); #endif - TRACE_POSTGRESQL_SORT_START(INDEX_SORT, - false, /* no unique check */ - 2, - workMem, - false); + RUM_SORT_START(INDEX_SORT, + false, /* no unique check */ + 2, + workMem, + false); state->cmp = cmp; state->comparetup = comparetup_rumitem; @@ -1236,11 +1249,11 @@ rum_tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, state->nKeys = 1; /* always a one-column sort */ - TRACE_POSTGRESQL_SORT_START(DATUM_SORT, - false, /* no unique check */ - 1, - workMem, - randomAccess); + RUM_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess); state->comparetup = comparetup_datum; state->copytup = copytup_datum; From bdecedb112fa655841ab3171f5638c71fe1279c0 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 26 Feb 2018 17:46:40 +0300 Subject: [PATCH 018/182] Fix TAP test for PostgreSQL 9.6 and lower --- t/001_wal.pl | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/t/001_wal.pl b/t/001_wal.pl index 182dc22073..6cd507da86 100644 --- a/t/001_wal.pl +++ b/t/001_wal.pl @@ -13,10 +13,23 @@ sub test_index_replay { my ($test_name) = @_; + # Check server version + my $server_version = $node_master->safe_psql("postgres", "SELECT current_setting('server_version_num');") + 0; + # Wait for standby to catch up my $applname = $node_standby->name; - my $caughtup_query = - "SELECT pg_current_wal_lsn() <= write_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; + my $caughtup_query; + + if ($server_version < 100000) + { + $caughtup_query = + "SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';"; + } + else + { + $caughtup_query = + "SELECT pg_current_wal_lsn() <= write_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; + } $node_master->poll_query_until('postgres', $caughtup_query) or die "Timed out while waiting for standby 1 to catch up"; From 1b80a143b0f3cc898674ddbf75753fc9c8a2c3f7 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 3 Apr 2018 19:03:31 +0300 Subject: [PATCH 019/182] Port RUM to current master --- src/rum.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/rum.h b/src/rum.h index ec387c101f..ffce9b2aaa 100644 --- a/src/rum.h +++ b/src/rum.h @@ -1061,7 +1061,6 @@ extern Datum FunctionCall10Coll(FmgrInfo *flinfo, Oid collation, #if PG_VERSION_NUM >= 110000 #define RumContextCreate(parent, name) \ AllocSetContextCreateExtended(parent, name, \ - MEMCONTEXT_COPY_NAME, \ ALLOCSET_DEFAULT_MINSIZE, \ ALLOCSET_DEFAULT_INITSIZE, \ ALLOCSET_DEFAULT_MAXSIZE) From d874e224f655198331a359230e08642c8afeae12 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 12 Apr 2018 18:14:44 +0300 Subject: [PATCH 020/182] Port RUM to current master --- src/rumsort.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 70d1cb708a..b975f4b3e3 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -3443,7 +3443,11 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, int32 compare; /* Compare the leading sort key, if it's simple */ +#if PG_VERSION_NUM >= 110000 + if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) +#else if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) +#endif { compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags, scanKey->sk_collation, @@ -3472,7 +3476,11 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, for (; nkey < state->nKeys; nkey++, scanKey++) { +#if PG_VERSION_NUM >= 110000 + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; +#else AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[nkey]; +#endif Datum datum1, datum2; bool isnull1, @@ -3538,15 +3546,20 @@ static void copytup_cluster(RumTuplesortstate *state, SortTuple *stup, void *tup) { HeapTuple tuple = (HeapTuple) tup; +#if PG_VERSION_NUM >= 110000 + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[0]; +#else + AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[0]; +#endif /* copy the tuple into sort storage */ tuple = heap_copytuple(tuple); stup->tuple = (void *) tuple; USEMEM(state, GetMemoryChunkSpace(tuple)); /* set up first-column key value, if it's a simple column */ - if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) + if (attno != 0) stup->datum1 = heap_getattr(tuple, - state->indexInfo->ii_KeyAttrNumbers[0], + attno, state->tupDesc, &stup->isnull1); } @@ -3578,6 +3591,11 @@ readtup_cluster(RumTuplesortstate *state, SortTuple *stup, { unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); HeapTuple tuple = (HeapTuple) palloc(t_len + HEAPTUPLESIZE); +#if PG_VERSION_NUM >= 110000 + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[0]; +#else + AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[0]; +#endif USEMEM(state, GetMemoryChunkSpace(tuple)); /* Reconstruct the HeapTupleData header */ @@ -3595,9 +3613,9 @@ readtup_cluster(RumTuplesortstate *state, SortTuple *stup, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value, if it's a simple column */ - if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) + if (attno != 0) stup->datum1 = heap_getattr(tuple, - state->indexInfo->ii_KeyAttrNumbers[0], + attno, state->tupDesc, &stup->isnull1); } From 97441e1a20eda49243f042e0c0085aa05e724f07 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 13 Apr 2018 14:46:21 +0300 Subject: [PATCH 021/182] PGPRO-1572: Fix crash on empty table using index scan --- expected/rum.out | 12 ++++++++++++ sql/rum.sql | 9 +++++++++ src/rumget.c | 19 +++++++++++++++++-- 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/expected/rum.out b/expected/rum.out index e0bc9f95dc..11d4cafcae 100644 --- a/expected/rum.out +++ b/expected/rum.out @@ -4,6 +4,18 @@ CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON test_rum FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger('a', 'pg_catalog.english', 't'); CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); +-- Check empty table using index scan +SELECT + a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), + rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + * + FROM test_rum + ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; + ?column? | rum_ts_distance | t | a +----------+-----------------+---+--- +(0 rows) + +-- Fill the table with data \copy test_rum(t) from 'data/rum.data'; CREATE INDEX failed_rumidx ON test_rum USING rum (a rum_tsvector_addon_ops); ERROR: additional information attribute "a" is not found in index diff --git a/sql/rum.sql b/sql/rum.sql index 44648ca1b4..b2e03f3c5f 100644 --- a/sql/rum.sql +++ b/sql/rum.sql @@ -7,6 +7,15 @@ BEFORE UPDATE OR INSERT ON test_rum FOR EACH ROW EXECUTE PROCEDURE tsvector_update_trigger('a', 'pg_catalog.english', 't'); CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); +-- Check empty table using index scan +SELECT + a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), + rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + * + FROM test_rum + ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; + +-- Fill the table with data \copy test_rum(t) from 'data/rum.data'; CREATE INDEX failed_rumidx ON test_rum USING rum (a rum_tsvector_addon_ops); diff --git a/src/rumget.c b/src/rumget.c index 42058eaf5d..9510237bf4 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -609,7 +609,17 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) (entry->queryCategory == RUM_CAT_EMPTY_QUERY && entry->scanWithAddInfo)) { - IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); + IndexTuple itup; + ItemId itemid = PageGetItemId(page, stackEntry->off); + + /* + * We don't want to crash if line pointer is not used. + */ + if (entry->queryCategory == RUM_CAT_EMPTY_QUERY && + !ItemIdHasStorage(itemid)) + goto endScanEntry; + + itup = (IndexTuple) PageGetItem(page, itemid); if (RumIsPostingTree(itup)) { @@ -689,6 +699,7 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) SCAN_ENTRY_GET_KEY(entry, rumstate, itup); } +endScanEntry: if (needUnlock) LockBuffer(stackEntry->buffer, RUM_UNLOCK); if (entry->stack == NULL) @@ -2043,8 +2054,12 @@ scanGetItemFull(IndexScanDesc scan, RumItem *advancePast, */ entry = so->entries[0]; + if (entry->isFinished) + return false; + entryGetItem(&so->rumstate, entry, &nextEntryList, scan->xs_snapshot); - if (entry->isFinished == true) + + if (entry->isFinished) return false; /* Fill outerAddInfo */ From 8322d6c44e528c25796d13c4eb4d0323c5541b75 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 16 Apr 2018 12:34:54 +0300 Subject: [PATCH 022/182] Do not form a tuple with unitialized area --- src/ruminsert.c | 2 ++ src/rumvacuum.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/ruminsert.c b/src/ruminsert.c index 566d84d592..f9ce47a30d 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -201,6 +201,8 @@ RumFormTuple(RumState * rumstate, { itup = repalloc(itup, newsize); + memset((char *) itup + IndexTupleSize(itup), + 0, newsize - IndexTupleSize(itup)); /* set new size in tuple header */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 75ed121758..32662c65f6 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -182,6 +182,8 @@ RumFormTuple(RumState * rumstate, { itup = repalloc(itup, newsize); + memset((char *) itup + IndexTupleSize(itup), + 0, newsize - IndexTupleSize(itup)); /* set new size in tuple header */ itup->t_info &= ~INDEX_SIZE_MASK; itup->t_info |= newsize; From 21fee6611efba7ce1c38ca07237a67bc7d20962b Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 3 Aug 2018 14:21:32 +0300 Subject: [PATCH 023/182] Issue #31: Port RUM to current master --- src/btree_rum.c | 3 +++ src/rum_arr_utils.c | 3 +++ src/rum_ts_utils.c | 3 +++ src/rumget.c | 3 +++ 4 files changed, 12 insertions(+) diff --git a/src/btree_rum.c b/src/btree_rum.c index cc85a3ca7c..170ace6aba 100644 --- a/src/btree_rum.c +++ b/src/btree_rum.c @@ -7,6 +7,9 @@ #include "utils/bytea.h" #include "utils/cash.h" #include "utils/date.h" +#if PG_VERSION_NUM >= 120000 +#include "utils/float.h" +#endif #include "utils/inet.h" #include "utils/numeric.h" #include "utils/timestamp.h" diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 46233e020d..78b788eb8b 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -24,6 +24,9 @@ #include "utils/array.h" #include "utils/builtins.h" #include "utils/catcache.h" +#if PG_VERSION_NUM >= 120000 +#include "utils/float.h" +#endif #include "utils/lsyscache.h" #include "utils/syscache.h" #include "utils/typcache.h" diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 38d5075c7e..b6eff4524b 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -20,6 +20,9 @@ #include "tsearch/ts_utils.h" #include "utils/array.h" #include "utils/builtins.h" +#if PG_VERSION_NUM >= 120000 +#include "utils/float.h" +#endif #include "utils/typcache.h" #include "rum.h" diff --git a/src/rumget.c b/src/rumget.c index 9510237bf4..2e5dd2593b 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -19,6 +19,9 @@ #include "miscadmin.h" #include "utils/builtins.h" #include "utils/datum.h" +#if PG_VERSION_NUM >= 120000 +#include "utils/float.h" +#endif #include "utils/memutils.h" #include "rum.h" From 78e61f8017322fc51b45d75e78c8ac072d98ef72 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 7 Aug 2018 18:36:27 +0300 Subject: [PATCH 024/182] Add dir into .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 358883292d..dfc31f487a 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ results __pycache__ *.pyc tmp_install +log # virtualenv bin From 0f20de45ac420e49830b2b385419abb3a0935cdd Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 13 Aug 2018 13:38:28 +0300 Subject: [PATCH 025/182] README: Use generic WAL records --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 32e8196929..f08c76e5f6 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ idea of `RUM` by the following picture: Drawback of `RUM` is that it has slower build and insert time than `GIN`. It is because we need to store additional information besides keys and because -`RUM` uses generic WAL. +`RUM` uses generic WAL records. ## License @@ -150,7 +150,7 @@ numeric, timestamp, timestamptz Supported operations: `<`, `<=`, `=`, `>=`, `>` for all types and `<=>`, `<=|` and `|=>` for int2, int4, int8, float4, float8, money, oid, -timestamp and timestamptz types. +timestamp and timestamptz types. Supports ordering by `<=>`, `<=|` and `|=>` operators. Can be used with `rum_tsvector_addon_ops`, `rum_tsvector_hash_addon_ops' and `rum_anyarray_addon_ops` operator classes. @@ -176,7 +176,7 @@ Now we can execute the following queries: ```sql EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------- Limit -> Index Scan using tsts_idx on tsts @@ -185,7 +185,7 @@ EXPLAIN (costs off) (4 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? + id | d | ?column? -----+---------------------------------+--------------- 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 @@ -225,7 +225,7 @@ Now we can execute the following fast query: ```sql SELECT * FROM query WHERE to_tsvector('black holes never exists before we think about them') @@ q; - q | tag + q | tag ------------------+------- 'black' | color 'black' & 'hole' | color From 7de75e1b128bc05eede1c5b227a574387a992e3a Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Mon, 8 Oct 2018 17:05:43 +0300 Subject: [PATCH 026/182] Add rum_ts_score() - inverted to rum_ts_distance function. Bump extension version, now 1.3. --- Makefile | 8 +- expected/rum.out | 61 +- expected/rum_hash.out | 65 +- rum--1.1.sql | 1513 ++++++++++++++++++++++++++++++++++++ rum--1.2--1.3.sql | 19 + rum--1.3.sql | 1726 +++++++++++++++++++++++++++++++++++++++++ rum.control | 2 +- sql/rum.sql | 17 +- sql/rum_hash.sql | 17 +- src/rum_ts_utils.c | 86 +- 10 files changed, 3438 insertions(+), 76 deletions(-) create mode 100644 rum--1.1.sql create mode 100644 rum--1.2--1.3.sql create mode 100644 rum--1.3.sql diff --git a/Makefile b/Makefile index d7d540287c..19476e53af 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ MODULE_big = rum EXTENSION = rum -EXTVERSION = 1.2 +EXTVERSION = 1.3 PGFILEDESC = "RUM index access method" OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ @@ -12,7 +12,9 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ src/btree_rum.o src/rum_arr_utils.o $(WIN32RES) DATA_first = rum--1.0.sql -DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql +DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql \ + rum--1.2--1.3.sql + DATA = $(DATA_first) rum--$(EXTVERSION).sql $(DATA_updates) # Do not use DATA_built. It removes built files if clean target was used @@ -52,7 +54,7 @@ wal-check: temp-install all: $(SQL_built) -#9.6 requires 1.2 file but 10.0 could live with update files +#9.6 requires 1.3 file but 10.0 could live with update files rum--$(EXTVERSION).sql: $(DATA_first) $(DATA_updates) cat $(DATA_first) $(DATA_updates) > rum--$(EXTVERSION).sql diff --git a/expected/rum.out b/expected/rum.out index 11d4cafcae..ad960650d2 100644 --- a/expected/rum.out +++ b/expected/rum.out @@ -8,11 +8,12 @@ CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); SELECT a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; - ?column? | rum_ts_distance | t | a -----------+-----------------+---+--- + ?column? | rum_ts_distance | rum_ts_score | t | a +----------+-----------------+--------------+---+--- (0 rows) -- Fill the table with data @@ -131,26 +132,30 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 1 (1 row) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); - rum_ts_distance | t | a ------------------+--------------------------------------------------------------------------+--------------------------------------------------------------- - 16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 - 16.4493 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 16.4493 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 - 16.4493 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+--------------------------------------------------------------------------+--------------------------------------------------------------- + 16.4493 | 0.0607927 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 + 16.4493 | 0.0607927 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 16.4493 | 0.0607927 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 + 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); - rum_ts_distance | t | a ------------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- + 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) SELECT @@ -166,26 +171,30 @@ SELECT (2 rows) -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); - rum_ts_distance | t | a ------------------+--------------------------------------------------------------------------+--------------------------------------------------------------- - 16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 - 16.4493 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 16.4493 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 - 16.4493 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+--------------------------------------------------------------------------+--------------------------------------------------------------- + 16.4493 | 0.0607927 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 + 16.4493 | 0.0607927 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 16.4493 | 0.0607927 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 + 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), * +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); - rum_ts_distance | t | a ------------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- + 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) INSERT INTO test_rum (t) VALUES ('foo bar foo the over foo qq bar'); diff --git a/expected/rum_hash.out b/expected/rum_hash.out index 3f255efc8a..4838be4e93 100644 --- a/expected/rum_hash.out +++ b/expected/rum_hash.out @@ -118,61 +118,70 @@ SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 1 (1 row) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); - rum_ts_distance | t | a ------------------+--------------------------------------------------------------------------+--------------------------------------------------------------- - 16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 - 16.4493 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 16.4493 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 - 16.4493 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+--------------------------------------------------------------------------+--------------------------------------------------------------- + 16.4493 | 0.0607927 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 + 16.4493 | 0.0607927 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 16.4493 | 0.0607927 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 + 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); - rum_ts_distance | t | a ------------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- + 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) SELECT a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; - ?column? | rum_ts_distance | t | a -----------+-----------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 8.22467 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | 57.5727 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + ?column? | rum_ts_distance | rum_ts_score | t | a +----------+-----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- + 8.22467 | 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); - rum_ts_distance | t | a ------------------+--------------------------------------------------------------------------+--------------------------------------------------------------- - 16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 - 16.4493 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 16.4493 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 - 16.4493 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+--------------------------------------------------------------------------+--------------------------------------------------------------- + 16.4493 | 0.0607927 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 + 16.4493 | 0.0607927 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 16.4493 | 0.0607927 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 + 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), * +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); - rum_ts_distance | t | a ------------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + rum_ts_distance | rum_ts_score | t | a +-----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- + 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) INSERT INTO test_rum_hash (t) VALUES ('foo bar foo the over foo qq bar'); diff --git a/rum--1.1.sql b/rum--1.1.sql new file mode 100644 index 0000000000..15b8ebae1c --- /dev/null +++ b/rum--1.1.sql @@ -0,0 +1,1513 @@ +CREATE OR REPLACE FUNCTION rumhandler(internal) +RETURNS index_am_handler +AS 'MODULE_PATHNAME' +LANGUAGE C; + +/* + * RUM access method + */ + +CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler; + +/* + * RUM built-in types, operators and functions + */ + +-- Type used in distance calculations with normalization argument +CREATE TYPE rum_distance_query AS (query tsquery, method int); + +CREATE FUNCTION tsquery_to_distance_query(tsquery) +RETURNS rum_distance_query +AS 'MODULE_PATHNAME', 'tsquery_to_distance_query' +LANGUAGE C IMMUTABLE STRICT; + +CREATE CAST (tsquery AS rum_distance_query) + WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT; + +CREATE FUNCTION rum_ts_distance(tsvector,tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_distance_tt' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_distance_td' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + LEFTARG = tsvector, + RIGHTARG = tsquery, + PROCEDURE = rum_ts_distance +); + +CREATE OPERATOR <=> ( + LEFTARG = tsvector, + RIGHTARG = rum_distance_query, + PROCEDURE = rum_ts_distance +); + +CREATE FUNCTION rum_timestamp_distance(timestamp, timestamp) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_timestamp_distance, + LEFTARG = timestamp, + RIGHTARG = timestamp, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_timestamp_left_distance(timestamp, timestamp) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_timestamp_left_distance, + LEFTARG = timestamp, + RIGHTARG = timestamp, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_timestamp_right_distance(timestamp, timestamp) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_timestamp_right_distance, + LEFTARG = timestamp, + RIGHTARG = timestamp, + COMMUTATOR = <=| +); + +/* + * rum_tsvector_ops operator class + */ + +CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsvector_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsquery_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +-- To prevent calling from SQL +CREATE FUNCTION rum_ts_join_pos(internal, internal) +RETURNS bytea +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR CLASS rum_tsvector_ops +DEFAULT FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 6 rum_tsvector_config(internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), + FUNCTION 10 rum_ts_join_pos(internal, internal), + STORAGE text; + +/* + * rum_tsvector_hash_ops operator class. + * + * Stores hash of entries as keys in index. + */ + +CREATE FUNCTION rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR CLASS rum_tsvector_hash_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 6 rum_tsvector_config(internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), + FUNCTION 10 rum_ts_join_pos(internal, internal), + STORAGE integer; + +/* + * rum_timestamp_ops operator class + */ + +-- timestamp operator class + +CREATE FUNCTION rum_timestamp_extract_value(timestamp,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_outer_distance(timestamp, timestamp, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OPERATOR CLASS rum_timestamp_ops +DEFAULT FOR TYPE timestamp USING rum +AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + --support + FUNCTION 1 timestamp_cmp(timestamp,timestamp), + FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), + FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), + FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), + FUNCTION 6 rum_timestamp_config(internal), + -- support to timestamp disttance in rum_tsvector_timestamp_ops + FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), + OPERATOR 20 <=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, +STORAGE timestamp; + +/* + * rum_tsvector_timestamp_ops operator class. + * + * Stores timestamp with tsvector. + */ + +CREATE FUNCTION rum_tsquery_timestamp_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +/* + * !!!deprecated, use rum_tsvector_hash_addon_ops!!! + */ +CREATE OPERATOR CLASS rum_tsvector_timestamp_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE text; + +/* + * rum_tsvector_hash_timestamp_ops operator class + * !!!deprecated, use rum_tsvector_hash_addon_ops!!! + */ + +CREATE OPERATOR CLASS rum_tsvector_hash_timestamp_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE integer; + +/* + * rum_timestamptz_ops operator class + */ + +CREATE FUNCTION rum_timestamptz_distance(timestamptz, timestamptz) +RETURNS float8 +AS 'MODULE_PATHNAME', 'rum_timestamp_distance' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_timestamptz_distance, + LEFTARG = timestamptz, + RIGHTARG = timestamptz, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_timestamptz_left_distance(timestamptz, timestamptz) +RETURNS float8 +AS 'MODULE_PATHNAME', 'rum_timestamp_left_distance' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_timestamptz_left_distance, + LEFTARG = timestamptz, + RIGHTARG = timestamptz, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_timestamptz_right_distance(timestamptz, timestamptz) +RETURNS float8 +AS 'MODULE_PATHNAME', 'rum_timestamp_right_distance' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_timestamptz_right_distance, + LEFTARG = timestamptz, + RIGHTARG = timestamptz, + COMMUTATOR = <=| +); + +CREATE OPERATOR CLASS rum_timestamptz_ops +DEFAULT FOR TYPE timestamptz USING rum +AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + --support + FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz), + FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), + FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), + FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), + FUNCTION 6 rum_timestamp_config(internal), + -- support to timestamptz distance in rum_tsvector_timestamptz_ops + FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), + OPERATOR 20 <=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, +STORAGE timestamptz; + +/* + * rum_tsvector_timestamptz_ops operator class. + * + * Stores tsvector with timestamptz. + */ + +CREATE OPERATOR CLASS rum_tsvector_timestamptz_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE text; + +/* + * rum_tsvector_hash_timestamptz_ops operator class + */ + +CREATE OPERATOR CLASS rum_tsvector_hash_timestamptz_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE integer; + +/* + * rum_tsquery_ops operator class. + * + * Used for inversed text search. + */ + +CREATE FUNCTION ruminv_extract_tsquery(tsquery,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION ruminv_tsvector_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION ruminv_tsquery_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR CLASS rum_tsquery_ops +DEFAULT FOR TYPE tsquery USING rum +AS + OPERATOR 1 @@ (tsquery, tsvector), + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 ruminv_extract_tsquery(tsquery,internal,internal,internal,internal), + FUNCTION 3 ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 ruminv_tsvector_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 6 ruminv_tsquery_config(internal), + STORAGE text; +CREATE FUNCTION rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +ALTER FUNCTION + rum_tsquery_timestamp_consistent (internal,smallint,tsvector,int,internal,internal,internal,internal) + RENAME TO rum_tsquery_addon_consistent; + +CREATE FUNCTION rum_numeric_cmp(numeric, numeric) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OPERATOR CLASS rum_tsvector_addon_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE text; + +CREATE OPERATOR CLASS rum_tsvector_hash_addon_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE integer; + +/*--------------------int2-----------------------*/ + +CREATE FUNCTION rum_int2_extract_value(int2, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int2_compare_prefix(int2, int2, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int2_extract_query(int2, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_int2_distance(int2, int2) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_int2_distance, + LEFTARG = int2, + RIGHTARG = int2, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_int2_left_distance(int2, int2) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_int2_left_distance, + LEFTARG = int2, + RIGHTARG = int2, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_int2_right_distance(int2, int2) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_int2_right_distance, + LEFTARG = int2, + RIGHTARG = int2, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_int2_outer_distance(int2, int2, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int2_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_int2_ops +DEFAULT FOR TYPE int2 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (int2,int2) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint2cmp(int2,int2), + FUNCTION 2 rum_int2_extract_value(int2, internal), + FUNCTION 3 rum_int2_extract_query(int2, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_int2_compare_prefix(int2,int2,int2, internal), + -- support to int2 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_int2_config(internal), + FUNCTION 9 rum_int2_outer_distance(int2, int2, smallint), +STORAGE int2; + +/*--------------------int4-----------------------*/ + +CREATE FUNCTION rum_int4_extract_value(int4, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int4_compare_prefix(int4, int4, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int4_extract_query(int4, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_int4_distance(int4, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_int4_distance, + LEFTARG = int4, + RIGHTARG = int4, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_int4_left_distance(int4, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_int4_left_distance, + LEFTARG = int4, + RIGHTARG = int4, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_int4_right_distance(int4, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_int4_right_distance, + LEFTARG = int4, + RIGHTARG = int4, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_int4_outer_distance(int4, int4, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int4_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_int4_ops +DEFAULT FOR TYPE int4 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (int4,int4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint4cmp(int4,int4), + FUNCTION 2 rum_int4_extract_value(int4, internal), + FUNCTION 3 rum_int4_extract_query(int4, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_int4_compare_prefix(int4,int4,int2, internal), + -- support to int4 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_int4_config(internal), + FUNCTION 9 rum_int4_outer_distance(int4, int4, smallint), +STORAGE int4; + +/*--------------------int8-----------------------*/ + +CREATE FUNCTION rum_int8_extract_value(int8, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int8_compare_prefix(int8, int8, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int8_extract_query(int8, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_int8_distance(int8, int8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_int8_distance, + LEFTARG = int8, + RIGHTARG = int8, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_int8_left_distance(int8, int8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_int8_left_distance, + LEFTARG = int8, + RIGHTARG = int8, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_int8_right_distance(int8, int8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_int8_right_distance, + LEFTARG = int8, + RIGHTARG = int8, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_int8_outer_distance(int8, int8, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int8_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_int8_ops +DEFAULT FOR TYPE int8 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (int8,int8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint8cmp(int8,int8), + FUNCTION 2 rum_int8_extract_value(int8, internal), + FUNCTION 3 rum_int8_extract_query(int8, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_int8_compare_prefix(int8,int8,int2, internal), + -- support to int8 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_int8_config(internal), + FUNCTION 9 rum_int8_outer_distance(int8, int8, smallint), +STORAGE int8; + +/*--------------------float4-----------------------*/ + +CREATE FUNCTION rum_float4_extract_value(float4, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float4_compare_prefix(float4, float4, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float4_extract_query(float4, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_float4_distance(float4, float4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_float4_distance, + LEFTARG = float4, + RIGHTARG = float4, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_float4_left_distance(float4, float4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_float4_left_distance, + LEFTARG = float4, + RIGHTARG = float4, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_float4_right_distance(float4, float4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_float4_right_distance, + LEFTARG = float4, + RIGHTARG = float4, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_float4_outer_distance(float4, float4, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float4_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_float4_ops +DEFAULT FOR TYPE float4 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (float4,float4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btfloat4cmp(float4,float4), + FUNCTION 2 rum_float4_extract_value(float4, internal), + FUNCTION 3 rum_float4_extract_query(float4, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_float4_compare_prefix(float4,float4,int2, internal), + -- support to float4 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_float4_config(internal), + FUNCTION 9 rum_float4_outer_distance(float4, float4, smallint), +STORAGE float4; + +/*--------------------float8-----------------------*/ + +CREATE FUNCTION rum_float8_extract_value(float8, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float8_compare_prefix(float8, float8, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float8_extract_query(float8, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_float8_distance(float8, float8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_float8_distance, + LEFTARG = float8, + RIGHTARG = float8, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_float8_left_distance(float8, float8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_float8_left_distance, + LEFTARG = float8, + RIGHTARG = float8, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_float8_right_distance(float8, float8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_float8_right_distance, + LEFTARG = float8, + RIGHTARG = float8, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_float8_outer_distance(float8, float8, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float8_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_float8_ops +DEFAULT FOR TYPE float8 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (float8,float8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btfloat8cmp(float8,float8), + FUNCTION 2 rum_float8_extract_value(float8, internal), + FUNCTION 3 rum_float8_extract_query(float8, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_float8_compare_prefix(float8,float8,int2, internal), + -- support to float8 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_float8_config(internal), + FUNCTION 9 rum_float8_outer_distance(float8, float8, smallint), +STORAGE float8; + +/*--------------------money-----------------------*/ + +CREATE FUNCTION rum_money_extract_value(money, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_money_compare_prefix(money, money, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_money_extract_query(money, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_money_distance(money, money) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_money_distance, + LEFTARG = money, + RIGHTARG = money, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_money_left_distance(money, money) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_money_left_distance, + LEFTARG = money, + RIGHTARG = money, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_money_right_distance(money, money) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_money_right_distance, + LEFTARG = money, + RIGHTARG = money, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_money_outer_distance(money, money, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_money_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_money_ops +DEFAULT FOR TYPE money USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (money,money) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (money,money) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (money,money) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 cash_cmp(money,money), + FUNCTION 2 rum_money_extract_value(money, internal), + FUNCTION 3 rum_money_extract_query(money, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_money_compare_prefix(money,money,int2, internal), + -- support to money distance in rum_tsvector_addon_ops + FUNCTION 6 rum_money_config(internal), + FUNCTION 9 rum_money_outer_distance(money, money, smallint), +STORAGE money; + +/*--------------------oid-----------------------*/ + +CREATE FUNCTION rum_oid_extract_value(oid, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_oid_compare_prefix(oid, oid, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_oid_extract_query(oid, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_oid_distance(oid, oid) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_oid_distance, + LEFTARG = oid, + RIGHTARG = oid, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_oid_left_distance(oid, oid) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_oid_left_distance, + LEFTARG = oid, + RIGHTARG = oid, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_oid_right_distance(oid, oid) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_oid_right_distance, + LEFTARG = oid, + RIGHTARG = oid, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_oid_outer_distance(oid, oid, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_oid_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_oid_ops +DEFAULT FOR TYPE oid USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (oid,oid) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btoidcmp(oid,oid), + FUNCTION 2 rum_oid_extract_value(oid, internal), + FUNCTION 3 rum_oid_extract_query(oid, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_oid_compare_prefix(oid,oid,int2, internal), + -- support to oid distance in rum_tsvector_addon_ops + FUNCTION 6 rum_oid_config(internal), + FUNCTION 9 rum_oid_outer_distance(oid, oid, smallint), +STORAGE oid; + +/*--------------------time-----------------------*/ + +CREATE FUNCTION rum_time_extract_value(time, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_time_compare_prefix(time, time, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_time_extract_query(time, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_time_ops +DEFAULT FOR TYPE time USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 time_cmp(time,time), + FUNCTION 2 rum_time_extract_value(time, internal), + FUNCTION 3 rum_time_extract_query(time, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_time_compare_prefix(time,time,int2, internal), +STORAGE time; + +/*--------------------timetz-----------------------*/ + +CREATE FUNCTION rum_timetz_extract_value(timetz, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timetz_compare_prefix(timetz, timetz, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timetz_extract_query(timetz, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_timetz_ops +DEFAULT FOR TYPE timetz USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 timetz_cmp(timetz,timetz), + FUNCTION 2 rum_timetz_extract_value(timetz, internal), + FUNCTION 3 rum_timetz_extract_query(timetz, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_timetz_compare_prefix(timetz,timetz,int2, internal), +STORAGE timetz; + +/*--------------------date-----------------------*/ + +CREATE FUNCTION rum_date_extract_value(date, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_date_compare_prefix(date, date, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_date_extract_query(date, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_date_ops +DEFAULT FOR TYPE date USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 date_cmp(date,date), + FUNCTION 2 rum_date_extract_value(date, internal), + FUNCTION 3 rum_date_extract_query(date, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_date_compare_prefix(date,date,int2, internal), +STORAGE date; + +/*--------------------interval-----------------------*/ + +CREATE FUNCTION rum_interval_extract_value(interval, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_interval_compare_prefix(interval, interval, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_interval_extract_query(interval, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_interval_ops +DEFAULT FOR TYPE interval USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 interval_cmp(interval,interval), + FUNCTION 2 rum_interval_extract_value(interval, internal), + FUNCTION 3 rum_interval_extract_query(interval, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_interval_compare_prefix(interval,interval,int2, internal), +STORAGE interval; + +/*--------------------macaddr-----------------------*/ + +CREATE FUNCTION rum_macaddr_extract_value(macaddr, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_macaddr_compare_prefix(macaddr, macaddr, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_macaddr_extract_query(macaddr, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_macaddr_ops +DEFAULT FOR TYPE macaddr USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 macaddr_cmp(macaddr,macaddr), + FUNCTION 2 rum_macaddr_extract_value(macaddr, internal), + FUNCTION 3 rum_macaddr_extract_query(macaddr, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_macaddr_compare_prefix(macaddr,macaddr,int2, internal), +STORAGE macaddr; + +/*--------------------inet-----------------------*/ + +CREATE FUNCTION rum_inet_extract_value(inet, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_inet_compare_prefix(inet, inet, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_inet_extract_query(inet, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_inet_ops +DEFAULT FOR TYPE inet USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 network_cmp(inet,inet), + FUNCTION 2 rum_inet_extract_value(inet, internal), + FUNCTION 3 rum_inet_extract_query(inet, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_inet_compare_prefix(inet,inet,int2, internal), +STORAGE inet; + +/*--------------------cidr-----------------------*/ + +CREATE FUNCTION rum_cidr_extract_value(cidr, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_cidr_compare_prefix(cidr, cidr, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_cidr_extract_query(cidr, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_cidr_ops +DEFAULT FOR TYPE cidr USING rum +AS + OPERATOR 1 < (inet, inet), + OPERATOR 2 <= (inet, inet), + OPERATOR 3 = (inet, inet), + OPERATOR 4 >= (inet, inet), + OPERATOR 5 > (inet, inet), + FUNCTION 1 network_cmp(inet,inet), + FUNCTION 2 rum_cidr_extract_value(cidr, internal), + FUNCTION 3 rum_cidr_extract_query(cidr, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_cidr_compare_prefix(cidr,cidr,int2, internal), +STORAGE cidr; + +/*--------------------text-----------------------*/ + +CREATE FUNCTION rum_text_extract_value(text, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_text_compare_prefix(text, text, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_text_extract_query(text, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_text_ops +DEFAULT FOR TYPE text USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 bttextcmp(text,text), + FUNCTION 2 rum_text_extract_value(text, internal), + FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), +STORAGE text; + +/*--------------------varchar-----------------------*/ + + +CREATE OPERATOR CLASS rum_varchar_ops +DEFAULT FOR TYPE varchar USING rum +AS + OPERATOR 1 < (text, text), + OPERATOR 2 <= (text, text), + OPERATOR 3 = (text, text), + OPERATOR 4 >= (text, text), + OPERATOR 5 > (text, text), + FUNCTION 1 bttextcmp(text,text), + FUNCTION 2 rum_text_extract_value(text, internal), + FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), +STORAGE varchar; + +/*--------------------"char"-----------------------*/ + +CREATE FUNCTION rum_char_extract_value("char", internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_char_compare_prefix("char", "char", int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_char_extract_query("char", internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_char_ops +DEFAULT FOR TYPE "char" USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 btcharcmp("char","char"), + FUNCTION 2 rum_char_extract_value("char", internal), + FUNCTION 3 rum_char_extract_query("char", internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_char_compare_prefix("char","char",int2, internal), +STORAGE "char"; + +/*--------------------bytea-----------------------*/ + +CREATE FUNCTION rum_bytea_extract_value(bytea, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bytea_compare_prefix(bytea, bytea, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bytea_extract_query(bytea, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_bytea_ops +DEFAULT FOR TYPE bytea USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 byteacmp(bytea,bytea), + FUNCTION 2 rum_bytea_extract_value(bytea, internal), + FUNCTION 3 rum_bytea_extract_query(bytea, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_bytea_compare_prefix(bytea,bytea,int2, internal), +STORAGE bytea; + +/*--------------------bit-----------------------*/ + +CREATE FUNCTION rum_bit_extract_value(bit, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bit_compare_prefix(bit, bit, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bit_extract_query(bit, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_bit_ops +DEFAULT FOR TYPE bit USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 bitcmp(bit,bit), + FUNCTION 2 rum_bit_extract_value(bit, internal), + FUNCTION 3 rum_bit_extract_query(bit, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_bit_compare_prefix(bit,bit,int2, internal), +STORAGE bit; + +/*--------------------varbit-----------------------*/ + +CREATE FUNCTION rum_varbit_extract_value(varbit, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_varbit_compare_prefix(varbit, varbit, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_varbit_extract_query(varbit, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_varbit_ops +DEFAULT FOR TYPE varbit USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 varbitcmp(varbit,varbit), + FUNCTION 2 rum_varbit_extract_value(varbit, internal), + FUNCTION 3 rum_varbit_extract_query(varbit, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_varbit_compare_prefix(varbit,varbit,int2, internal), +STORAGE varbit; + +/*--------------------numeric-----------------------*/ + +CREATE FUNCTION rum_numeric_extract_value(numeric, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_numeric_compare_prefix(numeric, numeric, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_numeric_extract_query(numeric, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_numeric_ops +DEFAULT FOR TYPE numeric USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 rum_numeric_cmp(numeric,numeric), + FUNCTION 2 rum_numeric_extract_value(numeric, internal), + FUNCTION 3 rum_numeric_extract_query(numeric, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_numeric_compare_prefix(numeric,numeric,int2, internal), +STORAGE numeric; + diff --git a/rum--1.2--1.3.sql b/rum--1.2--1.3.sql new file mode 100644 index 0000000000..649b3524db --- /dev/null +++ b/rum--1.2--1.3.sql @@ -0,0 +1,19 @@ +/* + * RUM version 1.3 + */ + +CREATE FUNCTION rum_ts_score(tsvector,tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_score_tt' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_score(tsvector,tsquery,int) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_score_ttf' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_score(tsvector,rum_distance_query) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_score_td' +LANGUAGE C IMMUTABLE STRICT; + diff --git a/rum--1.3.sql b/rum--1.3.sql new file mode 100644 index 0000000000..40d9418c68 --- /dev/null +++ b/rum--1.3.sql @@ -0,0 +1,1726 @@ +CREATE OR REPLACE FUNCTION rumhandler(internal) +RETURNS index_am_handler +AS 'MODULE_PATHNAME' +LANGUAGE C; + +/* + * RUM access method + */ + +CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler; + +/* + * RUM built-in types, operators and functions + */ + +-- Type used in distance calculations with normalization argument +CREATE TYPE rum_distance_query AS (query tsquery, method int); + +CREATE FUNCTION tsquery_to_distance_query(tsquery) +RETURNS rum_distance_query +AS 'MODULE_PATHNAME', 'tsquery_to_distance_query' +LANGUAGE C IMMUTABLE STRICT; + +CREATE CAST (tsquery AS rum_distance_query) + WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT; + +CREATE FUNCTION rum_ts_distance(tsvector,tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_distance_tt' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_distance_td' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + LEFTARG = tsvector, + RIGHTARG = tsquery, + PROCEDURE = rum_ts_distance +); + +CREATE OPERATOR <=> ( + LEFTARG = tsvector, + RIGHTARG = rum_distance_query, + PROCEDURE = rum_ts_distance +); + +CREATE FUNCTION rum_timestamp_distance(timestamp, timestamp) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_timestamp_distance, + LEFTARG = timestamp, + RIGHTARG = timestamp, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_timestamp_left_distance(timestamp, timestamp) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_timestamp_left_distance, + LEFTARG = timestamp, + RIGHTARG = timestamp, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_timestamp_right_distance(timestamp, timestamp) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_timestamp_right_distance, + LEFTARG = timestamp, + RIGHTARG = timestamp, + COMMUTATOR = <=| +); + +/* + * rum_tsvector_ops operator class + */ + +CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsvector_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsquery_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +-- To prevent calling from SQL +CREATE FUNCTION rum_ts_join_pos(internal, internal) +RETURNS bytea +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR CLASS rum_tsvector_ops +DEFAULT FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 6 rum_tsvector_config(internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), + FUNCTION 10 rum_ts_join_pos(internal, internal), + STORAGE text; + +/* + * rum_tsvector_hash_ops operator class. + * + * Stores hash of entries as keys in index. + */ + +CREATE FUNCTION rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR CLASS rum_tsvector_hash_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 6 rum_tsvector_config(internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), + FUNCTION 10 rum_ts_join_pos(internal, internal), + STORAGE integer; + +/* + * rum_timestamp_ops operator class + */ + +-- timestamp operator class + +CREATE FUNCTION rum_timestamp_extract_value(timestamp,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timestamp_outer_distance(timestamp, timestamp, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OPERATOR CLASS rum_timestamp_ops +DEFAULT FOR TYPE timestamp USING rum +AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + --support + FUNCTION 1 timestamp_cmp(timestamp,timestamp), + FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), + FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), + FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), + FUNCTION 6 rum_timestamp_config(internal), + -- support to timestamp distance in rum_tsvector_timestamp_ops + FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), + OPERATOR 20 <=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, +STORAGE timestamp; + +/* + * rum_tsvector_timestamp_ops operator class. + * + * Stores timestamp with tsvector. + */ + +CREATE FUNCTION rum_tsquery_timestamp_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +/* + * !!!deprecated, use rum_tsvector_addon_ops!!! + */ +CREATE OPERATOR CLASS rum_tsvector_timestamp_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE text; + +/* + * rum_tsvector_hash_timestamp_ops operator class + * !!!deprecated, use rum_tsvector_hash_addon_ops!!! + */ + +CREATE OPERATOR CLASS rum_tsvector_hash_timestamp_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE integer; + +/* + * rum_timestamptz_ops operator class + */ + +CREATE FUNCTION rum_timestamptz_distance(timestamptz, timestamptz) +RETURNS float8 +AS 'MODULE_PATHNAME', 'rum_timestamp_distance' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_timestamptz_distance, + LEFTARG = timestamptz, + RIGHTARG = timestamptz, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_timestamptz_left_distance(timestamptz, timestamptz) +RETURNS float8 +AS 'MODULE_PATHNAME', 'rum_timestamp_left_distance' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_timestamptz_left_distance, + LEFTARG = timestamptz, + RIGHTARG = timestamptz, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_timestamptz_right_distance(timestamptz, timestamptz) +RETURNS float8 +AS 'MODULE_PATHNAME', 'rum_timestamp_right_distance' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_timestamptz_right_distance, + LEFTARG = timestamptz, + RIGHTARG = timestamptz, + COMMUTATOR = <=| +); + +CREATE OPERATOR CLASS rum_timestamptz_ops +DEFAULT FOR TYPE timestamptz USING rum +AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + --support + FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz), + FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), + FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), + FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), + FUNCTION 6 rum_timestamp_config(internal), + -- support to timestamptz distance in rum_tsvector_timestamptz_ops + FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), + OPERATOR 20 <=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, +STORAGE timestamptz; + +/* + * rum_tsvector_timestamptz_ops operator class. + * + * Stores tsvector with timestamptz. + */ + +CREATE OPERATOR CLASS rum_tsvector_timestamptz_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE text; + +/* + * rum_tsvector_hash_timestamptz_ops operator class + */ + +CREATE OPERATOR CLASS rum_tsvector_hash_timestamptz_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE integer; + +/* + * rum_tsquery_ops operator class. + * + * Used for inversed text search. + */ + +CREATE FUNCTION ruminv_extract_tsquery(tsquery,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION ruminv_tsvector_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION ruminv_tsquery_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR CLASS rum_tsquery_ops +DEFAULT FOR TYPE tsquery USING rum +AS + OPERATOR 1 @@ (tsquery, tsvector), + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 ruminv_extract_tsquery(tsquery,internal,internal,internal,internal), + FUNCTION 3 ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 ruminv_tsvector_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 6 ruminv_tsquery_config(internal), + STORAGE text; +/* + * RUM version 1.1 + */ + +CREATE FUNCTION rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +ALTER FUNCTION + rum_tsquery_timestamp_consistent (internal,smallint,tsvector,int,internal,internal,internal,internal) + RENAME TO rum_tsquery_addon_consistent; + +CREATE FUNCTION rum_numeric_cmp(numeric, numeric) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE OPERATOR CLASS rum_tsvector_addon_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 gin_cmp_tslexeme(text, text), + FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE text; + +CREATE OPERATOR CLASS rum_tsvector_hash_addon_ops +FOR TYPE tsvector USING rum +AS + OPERATOR 1 @@ (tsvector, tsquery), + --support function + FUNCTION 1 btint4cmp(integer, integer), + FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), + FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), + STORAGE integer; + +/*--------------------int2-----------------------*/ + +CREATE FUNCTION rum_int2_extract_value(int2, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int2_compare_prefix(int2, int2, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int2_extract_query(int2, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_int2_distance(int2, int2) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_int2_distance, + LEFTARG = int2, + RIGHTARG = int2, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_int2_left_distance(int2, int2) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_int2_left_distance, + LEFTARG = int2, + RIGHTARG = int2, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_int2_right_distance(int2, int2) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_int2_right_distance, + LEFTARG = int2, + RIGHTARG = int2, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_int2_outer_distance(int2, int2, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int2_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_int2_ops +DEFAULT FOR TYPE int2 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (int2,int2) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint2cmp(int2,int2), + FUNCTION 2 rum_int2_extract_value(int2, internal), + FUNCTION 3 rum_int2_extract_query(int2, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_int2_compare_prefix(int2,int2,int2, internal), + -- support to int2 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_int2_config(internal), + FUNCTION 9 rum_int2_outer_distance(int2, int2, smallint), +STORAGE int2; + +/*--------------------int4-----------------------*/ + +CREATE FUNCTION rum_int4_extract_value(int4, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int4_compare_prefix(int4, int4, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int4_extract_query(int4, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_int4_distance(int4, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_int4_distance, + LEFTARG = int4, + RIGHTARG = int4, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_int4_left_distance(int4, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_int4_left_distance, + LEFTARG = int4, + RIGHTARG = int4, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_int4_right_distance(int4, int4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_int4_right_distance, + LEFTARG = int4, + RIGHTARG = int4, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_int4_outer_distance(int4, int4, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int4_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_int4_ops +DEFAULT FOR TYPE int4 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (int4,int4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint4cmp(int4,int4), + FUNCTION 2 rum_int4_extract_value(int4, internal), + FUNCTION 3 rum_int4_extract_query(int4, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_int4_compare_prefix(int4,int4,int2, internal), + -- support to int4 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_int4_config(internal), + FUNCTION 9 rum_int4_outer_distance(int4, int4, smallint), +STORAGE int4; + +/*--------------------int8-----------------------*/ + +CREATE FUNCTION rum_int8_extract_value(int8, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int8_compare_prefix(int8, int8, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int8_extract_query(int8, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_int8_distance(int8, int8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_int8_distance, + LEFTARG = int8, + RIGHTARG = int8, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_int8_left_distance(int8, int8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_int8_left_distance, + LEFTARG = int8, + RIGHTARG = int8, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_int8_right_distance(int8, int8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_int8_right_distance, + LEFTARG = int8, + RIGHTARG = int8, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_int8_outer_distance(int8, int8, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_int8_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_int8_ops +DEFAULT FOR TYPE int8 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (int8,int8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btint8cmp(int8,int8), + FUNCTION 2 rum_int8_extract_value(int8, internal), + FUNCTION 3 rum_int8_extract_query(int8, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_int8_compare_prefix(int8,int8,int2, internal), + -- support to int8 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_int8_config(internal), + FUNCTION 9 rum_int8_outer_distance(int8, int8, smallint), +STORAGE int8; + +/*--------------------float4-----------------------*/ + +CREATE FUNCTION rum_float4_extract_value(float4, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float4_compare_prefix(float4, float4, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float4_extract_query(float4, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_float4_distance(float4, float4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_float4_distance, + LEFTARG = float4, + RIGHTARG = float4, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_float4_left_distance(float4, float4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_float4_left_distance, + LEFTARG = float4, + RIGHTARG = float4, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_float4_right_distance(float4, float4) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_float4_right_distance, + LEFTARG = float4, + RIGHTARG = float4, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_float4_outer_distance(float4, float4, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float4_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_float4_ops +DEFAULT FOR TYPE float4 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (float4,float4) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btfloat4cmp(float4,float4), + FUNCTION 2 rum_float4_extract_value(float4, internal), + FUNCTION 3 rum_float4_extract_query(float4, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_float4_compare_prefix(float4,float4,int2, internal), + -- support to float4 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_float4_config(internal), + FUNCTION 9 rum_float4_outer_distance(float4, float4, smallint), +STORAGE float4; + +/*--------------------float8-----------------------*/ + +CREATE FUNCTION rum_float8_extract_value(float8, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float8_compare_prefix(float8, float8, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float8_extract_query(float8, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_float8_distance(float8, float8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_float8_distance, + LEFTARG = float8, + RIGHTARG = float8, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_float8_left_distance(float8, float8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_float8_left_distance, + LEFTARG = float8, + RIGHTARG = float8, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_float8_right_distance(float8, float8) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_float8_right_distance, + LEFTARG = float8, + RIGHTARG = float8, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_float8_outer_distance(float8, float8, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_float8_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_float8_ops +DEFAULT FOR TYPE float8 USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (float8,float8) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btfloat8cmp(float8,float8), + FUNCTION 2 rum_float8_extract_value(float8, internal), + FUNCTION 3 rum_float8_extract_query(float8, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_float8_compare_prefix(float8,float8,int2, internal), + -- support to float8 distance in rum_tsvector_addon_ops + FUNCTION 6 rum_float8_config(internal), + FUNCTION 9 rum_float8_outer_distance(float8, float8, smallint), +STORAGE float8; + +/*--------------------money-----------------------*/ + +CREATE FUNCTION rum_money_extract_value(money, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_money_compare_prefix(money, money, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_money_extract_query(money, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_money_distance(money, money) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_money_distance, + LEFTARG = money, + RIGHTARG = money, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_money_left_distance(money, money) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_money_left_distance, + LEFTARG = money, + RIGHTARG = money, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_money_right_distance(money, money) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_money_right_distance, + LEFTARG = money, + RIGHTARG = money, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_money_outer_distance(money, money, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_money_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_money_ops +DEFAULT FOR TYPE money USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (money,money) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (money,money) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (money,money) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 cash_cmp(money,money), + FUNCTION 2 rum_money_extract_value(money, internal), + FUNCTION 3 rum_money_extract_query(money, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_money_compare_prefix(money,money,int2, internal), + -- support to money distance in rum_tsvector_addon_ops + FUNCTION 6 rum_money_config(internal), + FUNCTION 9 rum_money_outer_distance(money, money, smallint), +STORAGE money; + +/*--------------------oid-----------------------*/ + +CREATE FUNCTION rum_oid_extract_value(oid, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_oid_compare_prefix(oid, oid, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_oid_extract_query(oid, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + + +CREATE FUNCTION rum_oid_distance(oid, oid) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_oid_distance, + LEFTARG = oid, + RIGHTARG = oid, + COMMUTATOR = <=> +); + +CREATE FUNCTION rum_oid_left_distance(oid, oid) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR <=| ( + PROCEDURE = rum_oid_left_distance, + LEFTARG = oid, + RIGHTARG = oid, + COMMUTATOR = |=> +); + +CREATE FUNCTION rum_oid_right_distance(oid, oid) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE OPERATOR |=> ( + PROCEDURE = rum_oid_right_distance, + LEFTARG = oid, + RIGHTARG = oid, + COMMUTATOR = <=| +); + +CREATE FUNCTION rum_oid_outer_distance(oid, oid, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_oid_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + + +CREATE OPERATOR CLASS rum_oid_ops +DEFAULT FOR TYPE oid USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + OPERATOR 20 <=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 21 <=| (oid,oid) FOR ORDER BY pg_catalog.float_ops, + OPERATOR 22 |=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, + FUNCTION 1 btoidcmp(oid,oid), + FUNCTION 2 rum_oid_extract_value(oid, internal), + FUNCTION 3 rum_oid_extract_query(oid, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_oid_compare_prefix(oid,oid,int2, internal), + -- support to oid distance in rum_tsvector_addon_ops + FUNCTION 6 rum_oid_config(internal), + FUNCTION 9 rum_oid_outer_distance(oid, oid, smallint), +STORAGE oid; + +/*--------------------time-----------------------*/ + +CREATE FUNCTION rum_time_extract_value(time, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_time_compare_prefix(time, time, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_time_extract_query(time, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_time_ops +DEFAULT FOR TYPE time USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 time_cmp(time,time), + FUNCTION 2 rum_time_extract_value(time, internal), + FUNCTION 3 rum_time_extract_query(time, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_time_compare_prefix(time,time,int2, internal), +STORAGE time; + +/*--------------------timetz-----------------------*/ + +CREATE FUNCTION rum_timetz_extract_value(timetz, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timetz_compare_prefix(timetz, timetz, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_timetz_extract_query(timetz, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_timetz_ops +DEFAULT FOR TYPE timetz USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 timetz_cmp(timetz,timetz), + FUNCTION 2 rum_timetz_extract_value(timetz, internal), + FUNCTION 3 rum_timetz_extract_query(timetz, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_timetz_compare_prefix(timetz,timetz,int2, internal), +STORAGE timetz; + +/*--------------------date-----------------------*/ + +CREATE FUNCTION rum_date_extract_value(date, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_date_compare_prefix(date, date, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_date_extract_query(date, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_date_ops +DEFAULT FOR TYPE date USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 date_cmp(date,date), + FUNCTION 2 rum_date_extract_value(date, internal), + FUNCTION 3 rum_date_extract_query(date, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_date_compare_prefix(date,date,int2, internal), +STORAGE date; + +/*--------------------interval-----------------------*/ + +CREATE FUNCTION rum_interval_extract_value(interval, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_interval_compare_prefix(interval, interval, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_interval_extract_query(interval, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_interval_ops +DEFAULT FOR TYPE interval USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 interval_cmp(interval,interval), + FUNCTION 2 rum_interval_extract_value(interval, internal), + FUNCTION 3 rum_interval_extract_query(interval, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_interval_compare_prefix(interval,interval,int2, internal), +STORAGE interval; + +/*--------------------macaddr-----------------------*/ + +CREATE FUNCTION rum_macaddr_extract_value(macaddr, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_macaddr_compare_prefix(macaddr, macaddr, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_macaddr_extract_query(macaddr, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_macaddr_ops +DEFAULT FOR TYPE macaddr USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 macaddr_cmp(macaddr,macaddr), + FUNCTION 2 rum_macaddr_extract_value(macaddr, internal), + FUNCTION 3 rum_macaddr_extract_query(macaddr, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_macaddr_compare_prefix(macaddr,macaddr,int2, internal), +STORAGE macaddr; + +/*--------------------inet-----------------------*/ + +CREATE FUNCTION rum_inet_extract_value(inet, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_inet_compare_prefix(inet, inet, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_inet_extract_query(inet, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_inet_ops +DEFAULT FOR TYPE inet USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 network_cmp(inet,inet), + FUNCTION 2 rum_inet_extract_value(inet, internal), + FUNCTION 3 rum_inet_extract_query(inet, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_inet_compare_prefix(inet,inet,int2, internal), +STORAGE inet; + +/*--------------------cidr-----------------------*/ + +CREATE FUNCTION rum_cidr_extract_value(cidr, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_cidr_compare_prefix(cidr, cidr, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_cidr_extract_query(cidr, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_cidr_ops +DEFAULT FOR TYPE cidr USING rum +AS + OPERATOR 1 < (inet, inet), + OPERATOR 2 <= (inet, inet), + OPERATOR 3 = (inet, inet), + OPERATOR 4 >= (inet, inet), + OPERATOR 5 > (inet, inet), + FUNCTION 1 network_cmp(inet,inet), + FUNCTION 2 rum_cidr_extract_value(cidr, internal), + FUNCTION 3 rum_cidr_extract_query(cidr, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_cidr_compare_prefix(cidr,cidr,int2, internal), +STORAGE cidr; + +/*--------------------text-----------------------*/ + +CREATE FUNCTION rum_text_extract_value(text, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_text_compare_prefix(text, text, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_text_extract_query(text, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_text_ops +DEFAULT FOR TYPE text USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 bttextcmp(text,text), + FUNCTION 2 rum_text_extract_value(text, internal), + FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), +STORAGE text; + +/*--------------------varchar-----------------------*/ + + +CREATE OPERATOR CLASS rum_varchar_ops +DEFAULT FOR TYPE varchar USING rum +AS + OPERATOR 1 < (text, text), + OPERATOR 2 <= (text, text), + OPERATOR 3 = (text, text), + OPERATOR 4 >= (text, text), + OPERATOR 5 > (text, text), + FUNCTION 1 bttextcmp(text,text), + FUNCTION 2 rum_text_extract_value(text, internal), + FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), +STORAGE varchar; + +/*--------------------"char"-----------------------*/ + +CREATE FUNCTION rum_char_extract_value("char", internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_char_compare_prefix("char", "char", int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_char_extract_query("char", internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_char_ops +DEFAULT FOR TYPE "char" USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 btcharcmp("char","char"), + FUNCTION 2 rum_char_extract_value("char", internal), + FUNCTION 3 rum_char_extract_query("char", internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_char_compare_prefix("char","char",int2, internal), +STORAGE "char"; + +/*--------------------bytea-----------------------*/ + +CREATE FUNCTION rum_bytea_extract_value(bytea, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bytea_compare_prefix(bytea, bytea, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bytea_extract_query(bytea, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_bytea_ops +DEFAULT FOR TYPE bytea USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 byteacmp(bytea,bytea), + FUNCTION 2 rum_bytea_extract_value(bytea, internal), + FUNCTION 3 rum_bytea_extract_query(bytea, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_bytea_compare_prefix(bytea,bytea,int2, internal), +STORAGE bytea; + +/*--------------------bit-----------------------*/ + +CREATE FUNCTION rum_bit_extract_value(bit, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bit_compare_prefix(bit, bit, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_bit_extract_query(bit, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_bit_ops +DEFAULT FOR TYPE bit USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 bitcmp(bit,bit), + FUNCTION 2 rum_bit_extract_value(bit, internal), + FUNCTION 3 rum_bit_extract_query(bit, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_bit_compare_prefix(bit,bit,int2, internal), +STORAGE bit; + +/*--------------------varbit-----------------------*/ + +CREATE FUNCTION rum_varbit_extract_value(varbit, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_varbit_compare_prefix(varbit, varbit, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_varbit_extract_query(varbit, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_varbit_ops +DEFAULT FOR TYPE varbit USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 varbitcmp(varbit,varbit), + FUNCTION 2 rum_varbit_extract_value(varbit, internal), + FUNCTION 3 rum_varbit_extract_query(varbit, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_varbit_compare_prefix(varbit,varbit,int2, internal), +STORAGE varbit; + +/*--------------------numeric-----------------------*/ + +CREATE FUNCTION rum_numeric_extract_value(numeric, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_numeric_compare_prefix(numeric, numeric, int2, internal) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + +CREATE FUNCTION rum_numeric_extract_query(numeric, internal, int2, internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + + +CREATE OPERATOR CLASS rum_numeric_ops +DEFAULT FOR TYPE numeric USING rum +AS + OPERATOR 1 < , + OPERATOR 2 <= , + OPERATOR 3 = , + OPERATOR 4 >= , + OPERATOR 5 > , + FUNCTION 1 rum_numeric_cmp(numeric,numeric), + FUNCTION 2 rum_numeric_extract_value(numeric, internal), + FUNCTION 3 rum_numeric_extract_query(numeric, internal, int2, internal, internal), + FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), + FUNCTION 5 rum_numeric_compare_prefix(numeric,numeric,int2, internal), +STORAGE numeric; + +/* + * RUM version 1.2 + */ + +/*--------------------anyarray-----------------------*/ + +CREATE FUNCTION rum_anyarray_config(internal) +RETURNS void +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +CREATE OR REPLACE FUNCTION rum_anyarray_similar(anyarray,anyarray) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT STABLE; + +CREATE OPERATOR % ( + PROCEDURE = rum_anyarray_similar, + LEFTARG = anyarray, + RIGHTARG = anyarray, + COMMUTATOR = '%', + RESTRICT = contsel, + JOIN = contjoinsel +); + + +CREATE OR REPLACE FUNCTION rum_anyarray_distance(anyarray,anyarray) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT STABLE; + +CREATE OPERATOR <=> ( + PROCEDURE = rum_anyarray_distance, + LEFTARG = anyarray, + RIGHTARG = anyarray, + COMMUTATOR = '<=>' +); + + +CREATE FUNCTION rum_extract_anyarray(anyarray,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_extract_anyarray_query(anyarray,internal,smallint,internal,internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_anyarray_consistent(internal, smallint, anyarray, integer, internal, internal, internal, internal) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_anyarray_ordering(internal,smallint,anyarray,int,internal,internal,internal,internal,internal) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +CREATE OPERATOR CLASS rum_anyarray_ops +DEFAULT FOR TYPE anyarray USING rum +AS + OPERATOR 1 && (anyarray, anyarray), + OPERATOR 2 @> (anyarray, anyarray), + OPERATOR 3 <@ (anyarray, anyarray), + OPERATOR 4 = (anyarray, anyarray), + OPERATOR 5 % (anyarray, anyarray), + OPERATOR 20 <=> (anyarray, anyarray) FOR ORDER BY pg_catalog.float_ops, + --dispatch function 1 for concrete type + FUNCTION 2 rum_extract_anyarray(anyarray,internal,internal,internal,internal), + FUNCTION 3 rum_extract_anyarray_query(anyarray,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 rum_anyarray_consistent(internal,smallint,anyarray,integer,internal,internal,internal,internal), + FUNCTION 6 rum_anyarray_config(internal), + FUNCTION 8 rum_anyarray_ordering(internal,smallint,anyarray,int,internal,internal,internal,internal,internal), + STORAGE anyelement; + +CREATE OPERATOR CLASS rum_anyarray_addon_ops +FOR TYPE anyarray USING rum +AS + OPERATOR 1 && (anyarray, anyarray), + OPERATOR 2 @> (anyarray, anyarray), + OPERATOR 3 <@ (anyarray, anyarray), + OPERATOR 4 = (anyarray, anyarray), + --dispatch function 1 for concrete type + FUNCTION 2 ginarrayextract(anyarray,internal,internal), + FUNCTION 3 ginqueryarrayextract(anyarray,internal,smallint,internal,internal,internal,internal), + FUNCTION 4 ginarrayconsistent(internal,smallint,anyarray,integer,internal,internal,internal,internal), + STORAGE anyelement; + +/*--------------------int2-----------------------*/ + +CREATE FUNCTION rum_int2_key_distance(int2, int2, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_int2_ops USING rum ADD + FUNCTION 8 (int2,int2) rum_int2_key_distance(int2, int2, smallint); + +/*--------------------int4-----------------------*/ + +CREATE FUNCTION rum_int4_key_distance(int4, int4, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_int4_ops USING rum ADD + FUNCTION 8 (int4,int4) rum_int4_key_distance(int4, int4, smallint); + +/*--------------------int8-----------------------*/ + +CREATE FUNCTION rum_int8_key_distance(int8, int8, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_int8_ops USING rum ADD + FUNCTION 8 (int8,int8) rum_int8_key_distance(int8, int8, smallint); + +/*--------------------float4-----------------------*/ + +CREATE FUNCTION rum_float4_key_distance(float4, float4, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_float4_ops USING rum ADD + FUNCTION 8 (float4,float4) rum_float4_key_distance(float4, float4, smallint); + +/*--------------------float8-----------------------*/ + +CREATE FUNCTION rum_float8_key_distance(float8, float8, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_float8_ops USING rum ADD + FUNCTION 8 (float8,float8) rum_float8_key_distance(float8, float8, smallint); + +/*--------------------money-----------------------*/ + +CREATE FUNCTION rum_money_key_distance(money, money, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_money_ops USING rum ADD + FUNCTION 8 (money,money) rum_money_key_distance(money, money, smallint); + +/*--------------------oid-----------------------*/ + +CREATE FUNCTION rum_oid_key_distance(oid, oid, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_oid_ops USING rum ADD + FUNCTION 8 (oid,oid) rum_oid_key_distance(oid, oid, smallint); + +/*--------------------timestamp-----------------------*/ + +CREATE FUNCTION rum_timestamp_key_distance(timestamp, timestamp, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_timestamp_ops USING rum ADD + FUNCTION 8 (timestamp,timestamp) rum_timestamp_key_distance(timestamp, timestamp, smallint); + +/*--------------------timestamptz-----------------------*/ + +CREATE FUNCTION rum_timestamptz_key_distance(timestamptz, timestamptz, smallint) +RETURNS float8 +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + + +ALTER OPERATOR FAMILY rum_timestamptz_ops USING rum ADD + FUNCTION 8 (timestamptz,timestamptz) rum_timestamptz_key_distance(timestamptz, timestamptz, smallint); + +/* + * RUM version 1.3 + */ + +CREATE FUNCTION rum_ts_score(tsvector,tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_score_tt' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_score(tsvector,tsquery,int) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_score_ttf' +LANGUAGE C IMMUTABLE STRICT; + +CREATE FUNCTION rum_ts_score(tsvector,rum_distance_query) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rum_ts_score_td' +LANGUAGE C IMMUTABLE STRICT; + diff --git a/rum.control b/rum.control index 18b5bd543f..30a00ccf67 100644 --- a/rum.control +++ b/rum.control @@ -1,5 +1,5 @@ # RUM extension comment = 'RUM index access method' -default_version = '1.2' +default_version = '1.3' module_pathname = '$libdir/rum' relocatable = true diff --git a/sql/rum.sql b/sql/rum.sql index b2e03f3c5f..8b8607faa6 100644 --- a/sql/rum.sql +++ b/sql/rum.sql @@ -11,6 +11,7 @@ CREATE INDEX rumidx ON test_rum USING rum (a rum_tsvector_ops); SELECT a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; @@ -46,11 +47,15 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'def <-> fgr'); SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'def <2> fgr'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); @@ -62,11 +67,15 @@ SELECT ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), * +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); diff --git a/sql/rum_hash.sql b/sql/rum_hash.sql index bab5f04c1c..511e772da5 100644 --- a/sql/rum_hash.sql +++ b/sql/rum_hash.sql @@ -35,27 +35,36 @@ SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'def <-> fgr'); SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'def <2> fgr'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); SELECT a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), * FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), * +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), * +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), + * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index b6eff4524b..b43ac3b0a3 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -46,6 +46,9 @@ PG_FUNCTION_INFO_V1(rum_tsquery_distance); PG_FUNCTION_INFO_V1(rum_ts_distance_tt); PG_FUNCTION_INFO_V1(rum_ts_distance_ttf); PG_FUNCTION_INFO_V1(rum_ts_distance_td); +PG_FUNCTION_INFO_V1(rum_ts_score_tt); +PG_FUNCTION_INFO_V1(rum_ts_score_ttf); +PG_FUNCTION_INFO_V1(rum_ts_score_td); PG_FUNCTION_INFO_V1(rum_ts_join_pos); PG_FUNCTION_INFO_V1(tsquery_to_distance_query); @@ -1473,15 +1476,9 @@ rum_ts_distance_ttf(PG_FUNCTION_ARGS) PG_RETURN_FLOAT4(1.0 / res); } -/* - * Implementation of <=> operator. Uses specified normalization method. - */ -Datum -rum_ts_distance_td(PG_FUNCTION_ARGS) +static float4 +calc_score_parse_opt(TSVector txt, HeapTupleHeader d) { - TSVector txt = PG_GETARG_TSVECTOR(0); - HeapTupleHeader d = PG_GETARG_HEAPTUPLEHEADER(1); - Oid tupType = HeapTupleHeaderGetTypeId(d); int32 tupTypmod = HeapTupleHeaderGetTypMod(d); TupleDesc tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod); @@ -1501,8 +1498,6 @@ rum_ts_distance_td(PG_FUNCTION_ARGS) if (isnull) { ReleaseTupleDesc(tupdesc); - PG_FREE_IF_COPY(txt, 0); - PG_FREE_IF_COPY(d, 1); elog(ERROR, "NULL query value is not allowed"); } @@ -1513,6 +1508,22 @@ rum_ts_distance_td(PG_FUNCTION_ARGS) res = calc_score(weights, txt, query, method); ReleaseTupleDesc(tupdesc); + + return res; +} + +/* + * Implementation of <=> operator. Uses specified normalization method. + */ +Datum +rum_ts_distance_td(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + HeapTupleHeader d = PG_GETARG_HEAPTUPLEHEADER(1); + float4 res; + + res = calc_score_parse_opt(txt, d); + PG_FREE_IF_COPY(txt, 0); PG_FREE_IF_COPY(d, 1); @@ -1522,6 +1533,61 @@ rum_ts_distance_td(PG_FUNCTION_ARGS) PG_RETURN_FLOAT4(1.0 / res); } +/* + * Calculate score (inverted distance). Uses default normalization method. + */ +Datum +rum_ts_score_tt(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + float4 res; + + res = calc_score(weights, txt, query, DEF_NORM_METHOD); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + + PG_RETURN_FLOAT4(res); +} + +/* + * Calculate score (inverted distance). Uses specified normalization method. + */ +Datum +rum_ts_score_ttf(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + TSQuery query = PG_GETARG_TSQUERY(1); + int method = PG_GETARG_INT32(2); + float4 res; + + res = calc_score(weights, txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + + PG_RETURN_FLOAT4(res); +} + +/* + * Calculate score (inverted distance). Uses specified normalization method. + */ +Datum +rum_ts_score_td(PG_FUNCTION_ARGS) +{ + TSVector txt = PG_GETARG_TSVECTOR(0); + HeapTupleHeader d = PG_GETARG_HEAPTUPLEHEADER(1); + float4 res; + + res = calc_score_parse_opt(txt, d); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(d, 1); + + PG_RETURN_FLOAT4(res); +} + /* * Casts tsquery to rum_distance_query type. */ From f621e8c8761bd836ff60335f3914926f3661cd8e Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 9 Oct 2018 11:35:37 +0300 Subject: [PATCH 027/182] Port RUM to current master --- src/rumsort.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rumsort.c b/src/rumsort.c index b975f4b3e3..94da17252e 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -3517,11 +3517,19 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; +#if PG_VERSION_NUM >= 120000 + ExecStoreHeapTuple(ltup, ecxt_scantuple, false); +#else ExecStoreTuple(ltup, ecxt_scantuple, InvalidBuffer, false); +#endif FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, l_index_values, l_index_isnull); +#if PG_VERSION_NUM >= 120000 + ExecStoreHeapTuple(rtup, ecxt_scantuple, false); +#else ExecStoreTuple(rtup, ecxt_scantuple, InvalidBuffer, false); +#endif FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, r_index_values, r_index_isnull); From 4ed27db55c3dec0751e8a798b89d3689461c249d Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 2 Nov 2018 15:12:51 +0300 Subject: [PATCH 028/182] Update copyrights --- LICENSE | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE b/LICENSE index d73dbdb454..c786b781e6 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ RUM is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. -Copyright (c) 2015-2017, Postgres Professional +Copyright (c) 2015-2018, Postgres Professional Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group Portions Copyright (c) 1994, The Regents of the University of California diff --git a/README.md b/README.md index f08c76e5f6..ab634da768 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ It is because we need to store additional information besides keys and because ## License -This module available under the same license as +This module available under the [license](LICENSE) similar to [PostgreSQL](http://www.postgresql.org/about/licence/). ## Installation From 7f025c9814b118d8648cd986afff1481647e9f3e Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 9 Nov 2018 14:27:52 +0300 Subject: [PATCH 029/182] Issue #48: Renames for PostgreSQL 10 and 11 in rbtree structures and functions --- src/rum.h | 6 +++++- src/rumbulk.c | 40 ++++++++++++++++++++++++---------------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/rum.h b/src/rum.h index ffce9b2aaa..b4e0ac9a6c 100644 --- a/src/rum.h +++ b/src/rum.h @@ -773,9 +773,13 @@ extern IndexBulkDeleteResult *rumvacuumcleanup(IndexVacuumInfo *info, extern bool rumvalidate(Oid opclassoid); /* rumbulk.c */ +#if PG_VERSION_NUM < 100000 +#define RBTNode RBNode +#endif + typedef struct RumEntryAccumulator { - RBNode rbnode; + RBTNode rbnode; Datum key; RumNullCategory category; OffsetNumber attnum; diff --git a/src/rumbulk.c b/src/rumbulk.c index 6e08056c55..dad8ea104f 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -21,10 +21,18 @@ #define DEF_NENTRY 2048 /* RumEntryAccumulator allocation quantum */ #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */ +/* PostgreSQL pre 10 has different names for this functions */ +#if PG_VERSION_NUM < 100000 +#define rbt_create(node_size, comparator, combiner, allocfunc, freefunc, arg) \ + (rb_create(node_size, comparator, combiner, allocfunc, freefunc, arg)) +#define rbt_insert(rbt, data, isNew) \ + (rb_insert(rbt, data, isNew)) +#endif + /* Combiner function for rbtree.c */ static void -rumCombineData(RBNode *existing, const RBNode *newdata, void *arg) +rumCombineData(RBTNode *existing, const RBTNode *newdata, void *arg) { RumEntryAccumulator *eo = (RumEntryAccumulator *) existing; const RumEntryAccumulator *en = (const RumEntryAccumulator *) newdata; @@ -65,7 +73,7 @@ rumCombineData(RBNode *existing, const RBNode *newdata, void *arg) /* Comparator function for rbtree.c */ static int -cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg) +cmpEntryAccumulator(const RBTNode *a, const RBTNode *b, void *arg) { const RumEntryAccumulator *ea = (const RumEntryAccumulator *) a; const RumEntryAccumulator *eb = (const RumEntryAccumulator *) b; @@ -77,7 +85,7 @@ cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg) } /* Allocator function for rbtree.c */ -static RBNode * +static RBTNode * rumAllocEntryAccumulator(void *arg) { BuildAccumulator *accum = (BuildAccumulator *) arg; @@ -85,7 +93,7 @@ rumAllocEntryAccumulator(void *arg) /* * Allocate memory by rather big chunks to decrease overhead. We have no - * need to reclaim RBNodes individually, so this costs nothing. + * need to reclaim RBTNodes individually, so this costs nothing. */ if (accum->entryallocator == NULL || accum->eas_used >= DEF_NENTRY) { @@ -94,11 +102,11 @@ rumAllocEntryAccumulator(void *arg) accum->eas_used = 0; } - /* Allocate new RBNode from current chunk */ + /* Allocate new RBTNode from current chunk */ ea = accum->entryallocator + accum->eas_used; accum->eas_used++; - return (RBNode *) ea; + return (RBTNode *) ea; } void @@ -108,12 +116,12 @@ rumInitBA(BuildAccumulator *accum) accum->allocatedMemory = 0; accum->entryallocator = NULL; accum->eas_used = 0; - accum->tree = rb_create(sizeof(RumEntryAccumulator), - cmpEntryAccumulator, - rumCombineData, - rumAllocEntryAccumulator, - NULL, /* no freefunc needed */ - (void *) accum); + accum->tree = rbt_create(sizeof(RumEntryAccumulator), + cmpEntryAccumulator, + rumCombineData, + rumAllocEntryAccumulator, + NULL, /* no freefunc needed */ + (void *) accum); } /* @@ -163,8 +171,8 @@ rumInsertBAEntry(BuildAccumulator *accum, item.addInfo = addInfo; item.addInfoIsNull = addInfoIsNull; - ea = (RumEntryAccumulator *) rb_insert(accum->tree, (RBNode *) &eatmp, - &isNew); + ea = (RumEntryAccumulator *) rbt_insert(accum->tree, (RBTNode *) &eatmp, + &isNew); if (isNew) { @@ -273,7 +281,7 @@ void rumBeginBAScan(BuildAccumulator *accum) { #if PG_VERSION_NUM >= 100000 - rb_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); + rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); #else rb_begin_iterate(accum->tree, LeftRightWalk); #endif @@ -293,7 +301,7 @@ rumGetBAEntry(BuildAccumulator *accum, RumItem *list; #if PG_VERSION_NUM >= 100000 - entry = (RumEntryAccumulator *) rb_iterate(&accum->tree_walk); + entry = (RumEntryAccumulator *) rbt_iterate(&accum->tree_walk); #else entry = (RumEntryAccumulator *) rb_iterate(accum->tree); #endif From 74769d82a84d5da921a9c7cc9b5e1524c055f8ef Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 9 Nov 2018 15:34:52 +0300 Subject: [PATCH 030/182] Issue #48: PostgreSQL 10.6 release didn't get rbtree renames Handle it differently for PostgreSQL 10.6 and 10.7. --- src/rum.h | 2 +- src/rumbulk.c | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/rum.h b/src/rum.h index b4e0ac9a6c..612112ef32 100644 --- a/src/rum.h +++ b/src/rum.h @@ -773,7 +773,7 @@ extern IndexBulkDeleteResult *rumvacuumcleanup(IndexVacuumInfo *info, extern bool rumvalidate(Oid opclassoid); /* rumbulk.c */ -#if PG_VERSION_NUM < 100000 +#if PG_VERSION_NUM <= 100006 #define RBTNode RBNode #endif diff --git a/src/rumbulk.c b/src/rumbulk.c index dad8ea104f..9bbe88da6a 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -22,7 +22,7 @@ #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */ /* PostgreSQL pre 10 has different names for this functions */ -#if PG_VERSION_NUM < 100000 +#if PG_VERSION_NUM <= 100006 #define rbt_create(node_size, comparator, combiner, allocfunc, freefunc, arg) \ (rb_create(node_size, comparator, combiner, allocfunc, freefunc, arg)) #define rbt_insert(rbt, data, isNew) \ @@ -280,8 +280,10 @@ qsortCompareRumItem(const void *a, const void *b, void *arg) void rumBeginBAScan(BuildAccumulator *accum) { -#if PG_VERSION_NUM >= 100000 +#if PG_VERSION_NUM > 100006 rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); +#elif PG_VERSION_NUM >= 100000 + rb_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); #else rb_begin_iterate(accum->tree, LeftRightWalk); #endif @@ -300,8 +302,10 @@ rumGetBAEntry(BuildAccumulator *accum, RumEntryAccumulator *entry; RumItem *list; -#if PG_VERSION_NUM >= 100000 +#if PG_VERSION_NUM > 100006 entry = (RumEntryAccumulator *) rbt_iterate(&accum->tree_walk); +#elif PG_VERSION_NUM >= 100000 + entry = (RumEntryAccumulator *) rb_iterate(&accum->tree_walk); #else entry = (RumEntryAccumulator *) rb_iterate(accum->tree); #endif From 691c9c66a0e47654bf5a546fe63908cfbac926f8 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 12 Nov 2018 11:42:40 +0300 Subject: [PATCH 031/182] Use typedef for RBTNode instead of define --- src/rum.h | 2 +- src/rumbtree.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rum.h b/src/rum.h index 612112ef32..ad98137f26 100644 --- a/src/rum.h +++ b/src/rum.h @@ -774,7 +774,7 @@ extern bool rumvalidate(Oid opclassoid); /* rumbulk.c */ #if PG_VERSION_NUM <= 100006 -#define RBTNode RBNode +typedef RBNode RBTNode; #endif typedef struct RumEntryAccumulator diff --git a/src/rumbtree.c b/src/rumbtree.c index f6244f5308..c2106eeab2 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -527,7 +527,7 @@ rumInsertValue(Relation index, RumBtree btree, RumBtreeStack * stack, else { BlockNumber rightrightBlkno = InvalidBlockNumber; - Buffer rightrightBuffer; + Buffer rightrightBuffer = InvalidBuffer; /* split non-root page */ if (btree->rumstate->isBuild) From 54c5110b0d458180afac554c79a888c44043f512 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 16 Nov 2018 12:46:59 +0300 Subject: [PATCH 032/182] Improve support of current master --- src/rum.h | 5 ++++- src/rumsort.c | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rum.h b/src/rum.h index ad98137f26..de47ef5254 100644 --- a/src/rum.h +++ b/src/rum.h @@ -1062,7 +1062,10 @@ extern Datum FunctionCall10Coll(FmgrInfo *flinfo, Oid collation, Datum arg9, Datum arg10); /* PostgreSQL version-agnostic creation of memory context */ -#if PG_VERSION_NUM >= 110000 +#if PG_VERSION_NUM >= 120000 +#define RumContextCreate(parent, name) \ + AllocSetContextCreate(parent, name, ALLOCSET_DEFAULT_SIZES) +#elif PG_VERSION_NUM >= 110000 #define RumContextCreate(parent, name) \ AllocSetContextCreateExtended(parent, name, \ ALLOCSET_DEFAULT_MINSIZE, \ diff --git a/src/rumsort.c b/src/rumsort.c index 94da17252e..a47801c6b7 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -1067,7 +1067,11 @@ rum_tuplesort_begin_cluster(TupleDesc tupDesc, * scantuple has to point to that slot, too. */ state->estate = CreateExecutorState(); +#if PG_VERSION_NUM >= 120000 + slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsVirtual); +#else slot = MakeSingleTupleTableSlot(tupDesc); +#endif econtext = GetPerTupleExprContext(state->estate); econtext->ecxt_scantuple = slot; } From 6e9f3849f73f8deb35e5239f821d071d15399933 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 11 Jan 2019 12:22:06 +0300 Subject: [PATCH 033/182] PGPRO-2202: Page get uninitialized because of PostingItem aligning --- src/rumdatapage.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/rumdatapage.c b/src/rumdatapage.c index bd112930dd..18b537a2fc 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -1280,8 +1280,8 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, PostingItemSetBlockNumber(&(btree->pitem), BufferGetBlockNumber(lbuf)); if (RumPageIsLeaf(newlPage)) - btree->pitem.item.iptr = *(ItemPointerData *) RumDataPageGetItem(newlPage, - RumPageGetOpaque(newlPage)->maxoff); + btree->pitem.item.iptr = ((PostingItem *) RumDataPageGetItem(newlPage, + RumPageGetOpaque(newlPage)->maxoff))->item.iptr; else btree->pitem.item = ((PostingItem *) RumDataPageGetItem(newlPage, RumPageGetOpaque(newlPage)->maxoff))->item; @@ -1433,10 +1433,12 @@ rumDataFillRoot(RumBtree btree, Buffer root, Buffer lbuf, Buffer rbuf, PostingItem li, ri; + memset(&li, 0, sizeof(PostingItem)); li.item = *RumDataPageGetRightBound(lpage); PostingItemSetBlockNumber(&li, BufferGetBlockNumber(lbuf)); RumDataPageAddItem(page, &li, InvalidOffsetNumber); + memset(&ri, 0, sizeof(PostingItem)); ri.item = *RumDataPageGetRightBound(rpage); PostingItemSetBlockNumber(&ri, BufferGetBlockNumber(rbuf)); RumDataPageAddItem(page, &ri, InvalidOffsetNumber); From cd890988cdd49cd712910515d712f37349a71030 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 11 Jan 2019 12:22:40 +0300 Subject: [PATCH 034/182] Improve support of current master --- src/rumutil.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/rumutil.c b/src/rumutil.c index 838674882f..ea7fcb3ee2 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -245,7 +245,11 @@ initRumState(RumState * state, Relation index) if (state->oneCol) { state->tupdesc[i] = CreateTemplateTupleDesc( +#if PG_VERSION_NUM >= 120000 + OidIsValid(rumConfig->addInfoTypeOid) ? 2 : 1); +#else OidIsValid(rumConfig->addInfoTypeOid) ? 2 : 1, false); +#endif TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, origAttr->atttypid, origAttr->atttypmod, @@ -266,7 +270,11 @@ initRumState(RumState * state, Relation index) else { state->tupdesc[i] = CreateTemplateTupleDesc( +#if PG_VERSION_NUM >= 120000 + OidIsValid(rumConfig->addInfoTypeOid) ? 3 : 2); +#else OidIsValid(rumConfig->addInfoTypeOid) ? 3 : 2, false); +#endif TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 1, NULL, INT2OID, -1, 0); TupleDescInitEntry(state->tupdesc[i], (AttrNumber) 2, NULL, From 5a1b5f59b512634a4aca60ff85cf885b025070ff Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 17 Jan 2019 13:28:47 +0300 Subject: [PATCH 035/182] Disallow to create index using ordering over pass-by-reference AddInfo Currently RUM has bogus behaviour when one creates an index using ordering over pass-by-reference AddInfo. This is due to the fact that posting trees have fixed length right bound and fixed length non-leaf posting items. --- expected/text.out | 52 +++++++++++++++++++++++++---------------------- sql/text.sql | 2 ++ src/rum.h | 10 +++++++++ src/rumutil.c | 4 ++++ 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/expected/text.out b/expected/text.out index d8df82168b..9cf9310a77 100644 --- a/expected/text.out +++ b/expected/text.out @@ -125,16 +125,18 @@ SELECT id FROM test_text_o WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; (8 rows) CREATE TABLE test_text_a AS SELECT id::text, t FROM tsts; +-- Should fail, temporarly it isn't allowed to order an index over pass-by-reference column CREATE INDEX test_text_a_idx ON test_text_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); +ERROR: doesn't support order index over pass-by-reference column EXPLAIN (costs off) SELECT count(*) FROM test_text_a WHERE id < '400'; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +------------------------------------ Aggregate - -> Index Scan using test_text_a_idx on test_text_a - Index Cond: (id < '400'::text) + -> Seq Scan on test_text_a + Filter: (id < '400'::text) (3 rows) SELECT count(*) FROM test_text_a WHERE id < '400'; @@ -145,12 +147,12 @@ SELECT count(*) FROM test_text_a WHERE id < '400'; EXPLAIN (costs off) SELECT id FROM test_text_a WHERE t @@ 'wr&qh' AND id <= '400' ORDER BY id; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_text_a_idx on test_text_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::text)) + -> Seq Scan on test_text_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::text)) (4 rows) SELECT id FROM test_text_a WHERE t @@ 'wr&qh' AND id <= '400' ORDER BY id; @@ -169,12 +171,12 @@ SELECT id FROM test_text_a WHERE t @@ 'wr&qh' AND id <= '400' ORDER BY id; EXPLAIN (costs off) SELECT id FROM test_text_a WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_text_a_idx on test_text_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::text)) + -> Seq Scan on test_text_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::text)) (4 rows) SELECT id FROM test_text_a WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; @@ -242,16 +244,18 @@ SELECT id FROM test_text_h_o WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; (8 rows) CREATE TABLE test_text_h_a AS SELECT id::text, t FROM tsts; +-- Should fail, temporarly it isn't allowed to order an index over pass-by-reference column CREATE INDEX test_text_h_a_idx ON test_text_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); +ERROR: doesn't support order index over pass-by-reference column EXPLAIN (costs off) SELECT count(*) FROM test_text_h_a WHERE id < '400'; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +------------------------------------ Aggregate - -> Index Scan using test_text_h_a_idx on test_text_h_a - Index Cond: (id < '400'::text) + -> Seq Scan on test_text_h_a + Filter: (id < '400'::text) (3 rows) SELECT count(*) FROM test_text_h_a WHERE id < '400'; @@ -262,12 +266,12 @@ SELECT count(*) FROM test_text_h_a WHERE id < '400'; EXPLAIN (costs off) SELECT id FROM test_text_h_a WHERE t @@ 'wr&qh' AND id <= '400' ORDER BY id; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_text_h_a_idx on test_text_h_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::text)) + -> Seq Scan on test_text_h_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::text)) (4 rows) SELECT id FROM test_text_h_a WHERE t @@ 'wr&qh' AND id <= '400' ORDER BY id; @@ -286,12 +290,12 @@ SELECT id FROM test_text_h_a WHERE t @@ 'wr&qh' AND id <= '400' ORDER BY id; EXPLAIN (costs off) SELECT id FROM test_text_h_a WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; - QUERY PLAN ---------------------------------------------------------------------------------- + QUERY PLAN +----------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_text_h_a_idx on test_text_h_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::text)) + -> Seq Scan on test_text_h_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::text)) (4 rows) SELECT id FROM test_text_h_a WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; diff --git a/sql/text.sql b/sql/text.sql index 5b7fbab485..1f340b7109 100644 --- a/sql/text.sql +++ b/sql/text.sql @@ -37,6 +37,7 @@ SELECT id FROM test_text_o WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; CREATE TABLE test_text_a AS SELECT id::text, t FROM tsts; +-- Should fail, temporarly it isn't allowed to order an index over pass-by-reference column CREATE INDEX test_text_a_idx ON test_text_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); @@ -67,6 +68,7 @@ SELECT id FROM test_text_h_o WHERE t @@ 'wr&qh' AND id >= '400' ORDER BY id; CREATE TABLE test_text_h_a AS SELECT id::text, t FROM tsts; +-- Should fail, temporarly it isn't allowed to order an index over pass-by-reference column CREATE INDEX test_text_h_a_idx ON test_text_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); diff --git a/src/rum.h b/src/rum.h index de47ef5254..9dcb41a730 100644 --- a/src/rum.h +++ b/src/rum.h @@ -262,6 +262,16 @@ typedef signed char RumNullCategory; /* * Data (posting tree) pages */ +/* + * FIXME -- Currently RumItem is placed as a pages right bound and PostingItem + * is placed as a non-leaf pages item. Both RumItem and PostingItem stores + * AddInfo as a raw Datum, which is bogus. It is fine for pass-by-value + * attributes, but it isn't for pass-by-reference, which may have variable + * length of data. This AddInfo is used only by order_by_attach indexes, so it + * isn't allowed to create index using ordering over pass-by-reference AddInfo, + * see initRumState(). This can be solved by having non-fixed length right bound + * and non-fixed non-leaf posting tree item. + */ #define RumDataPageGetRightBound(page) ((RumItem*) PageGetContents(page)) #define RumDataPageGetData(page) \ (PageGetContents(page) + MAXALIGN(sizeof(RumItem))) diff --git a/src/rumutil.c b/src/rumutil.c index ea7fcb3ee2..bca37c892a 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -239,6 +239,9 @@ initRumState(RumState * state, Relation index) if (OidIsValid(rumConfig->addInfoTypeOid)) elog(ERROR, "AddTo could should not have AddInfo"); + if (state->useAlternativeOrder && origAddAttr->attbyval == false) + elog(ERROR, "doesn't support order index over pass-by-reference column"); + rumConfig->addInfoTypeOid = origAddAttr->atttypid; } @@ -565,6 +568,7 @@ RumInitPage(Page page, uint32 f, Size pageSize) opaque->flags = f; opaque->leftlink = InvalidBlockNumber; opaque->rightlink = InvalidBlockNumber; + RumItemSetMin(RumDataPageGetRightBound(page)); } void From 27379a8b06b338de022f4f32d19f4ebcadd37ac2 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 17 Jan 2019 13:40:25 +0300 Subject: [PATCH 036/182] Update README due to commit 5a1b5f59 --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index ab634da768..dd575c479c 100644 --- a/README.md +++ b/README.md @@ -195,6 +195,8 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY (5 rows) ``` +> **Warning:** Currently RUM has bogus behaviour when one creates an index using ordering over pass-by-reference additional information. This is due to the fact that posting trees have fixed length right bound and fixed length non-leaf posting items. It isn't allowed to create such indexes. + ### rum_tsvector_hash_addon_ops For type: `tsvector` From 0af39929a0f9ab24a183370b190ab34b5b5da6eb Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 21 Jan 2019 17:35:29 +0300 Subject: [PATCH 037/182] Fix tests for 32-bit platforms --- expected/altorder_1.out | 143 ++++++++++++++--------- expected/altorder_hash_1.out | 143 ++++++++++++++--------- expected/int8_1.out | 216 +++++++++++++++++++++++------------ 3 files changed, 317 insertions(+), 185 deletions(-) diff --git a/expected/altorder_1.out b/expected/altorder_1.out index b8c3141b94..ee9396b80c 100644 --- a/expected/altorder_1.out +++ b/expected/altorder_1.out @@ -2,6 +2,7 @@ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' CREATE INDEX atsts_idx ON atsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); +ERROR: doesn't support order index over pass-by-reference column INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; @@ -118,14 +119,12 @@ RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------- Aggregate - -> Bitmap Heap Scan on atsts - Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) - -> Bitmap Index Scan on atsts_idx - Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) -(5 rows) + -> Seq Scan on atsts + Filter: (t @@ '''wr'' | ''qh'''::tsquery) +(3 rows) SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; count @@ -165,14 +164,12 @@ SELECT count(*) FROM atsts WHERE t @@ '(eq|yt)&(wr|qh)'; EXPLAIN (costs off) SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; - QUERY PLAN ------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on atsts - Recheck Cond: (d < 'Mon May 16 14:21:25 2016'::timestamp without time zone) - -> Bitmap Index Scan on atsts_idx - Index Cond: (d < 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(5 rows) + -> Seq Scan on atsts + Filter: (d < 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; count @@ -182,14 +179,12 @@ SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; EXPLAIN (costs off) SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; - QUERY PLAN ------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on atsts - Recheck Cond: (d > 'Mon May 16 14:21:25 2016'::timestamp without time zone) - -> Bitmap Index Scan on atsts_idx - Index Cond: (d > 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(5 rows) + -> Seq Scan on atsts + Filter: (d > 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; count @@ -199,59 +194,95 @@ SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atsts_idx on atsts - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atsts + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 +(5 rows) + EXPLAIN (costs off) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atsts_idx on atsts - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) + -> Sort + Sort Key: ((d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atsts + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 + 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 + 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 +(5 rows) + EXPLAIN (costs off) SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atsts_idx on atsts - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) + -> Sort + Sort Key: ((d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atsts + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 + 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 +(5 rows) + EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atsts_idx on atsts - Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(3 rows) + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atsts +(4 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 356 | Mon May 16 15:21:22.326724 2016 | 3597.326724 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 357 | Mon May 16 16:21:22.326724 2016 | 7197.326724 + 353 | Mon May 16 12:21:22.326724 2016 | 7202.673276 +(5 rows) + EXPLAIN (costs off) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ Sort Sort Key: d - -> Index Scan using atsts_idx on atsts - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atsts + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) (4 rows) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; @@ -270,12 +301,12 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER EXPLAIN (costs off) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ Sort Sort Key: d - -> Index Scan using atsts_idx on atsts - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atsts + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) (4 rows) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; diff --git a/expected/altorder_hash_1.out b/expected/altorder_hash_1.out index ce969c44bd..835c4ed88b 100644 --- a/expected/altorder_hash_1.out +++ b/expected/altorder_hash_1.out @@ -2,6 +2,7 @@ CREATE TABLE atstsh (id int, t tsvector, d timestamp); \copy atstsh from 'data/tsts.data' CREATE INDEX atstsh_idx ON atstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); +ERROR: doesn't support order index over pass-by-reference column INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; @@ -118,14 +119,12 @@ RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +--------------------------------------------------- Aggregate - -> Bitmap Heap Scan on atstsh - Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) - -> Bitmap Index Scan on atstsh_idx - Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) -(5 rows) + -> Seq Scan on atstsh + Filter: (t @@ '''wr'' | ''qh'''::tsquery) +(3 rows) SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; count @@ -165,14 +164,12 @@ SELECT count(*) FROM atstsh WHERE t @@ '(eq|yt)&(wr|qh)'; EXPLAIN (costs off) SELECT count(*) FROM atstsh WHERE d < '2016-05-16 14:21:25'; - QUERY PLAN ------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on atstsh - Recheck Cond: (d < 'Mon May 16 14:21:25 2016'::timestamp without time zone) - -> Bitmap Index Scan on atstsh_idx - Index Cond: (d < 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(5 rows) + -> Seq Scan on atstsh + Filter: (d < 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) SELECT count(*) FROM atstsh WHERE d < '2016-05-16 14:21:25'; count @@ -182,14 +179,12 @@ SELECT count(*) FROM atstsh WHERE d < '2016-05-16 14:21:25'; EXPLAIN (costs off) SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; - QUERY PLAN ------------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on atstsh - Recheck Cond: (d > 'Mon May 16 14:21:25 2016'::timestamp without time zone) - -> Bitmap Index Scan on atstsh_idx - Index Cond: (d > 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(5 rows) + -> Seq Scan on atstsh + Filter: (d > 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; count @@ -199,59 +194,95 @@ SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atstsh_idx on atstsh - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atstsh + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 +(5 rows) + EXPLAIN (costs off) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atstsh_idx on atstsh - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) + -> Sort + Sort Key: ((d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atstsh + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 + 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 + 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 +(5 rows) + EXPLAIN (costs off) SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atstsh_idx on atstsh - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) + -> Sort + Sort Key: ((d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atstsh + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 + 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 +(5 rows) + EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------------- Limit - -> Index Scan using atstsh_idx on atstsh - Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(3 rows) + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atstsh +(4 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 356 | Mon May 16 15:21:22.326724 2016 | 3597.326724 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 357 | Mon May 16 16:21:22.326724 2016 | 7197.326724 + 353 | Mon May 16 12:21:22.326724 2016 | 7202.673276 +(5 rows) + EXPLAIN (costs off) SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ Sort Sort Key: d - -> Index Scan using atstsh_idx on atstsh - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atstsh + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) (4 rows) SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; @@ -270,12 +301,12 @@ SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDE EXPLAIN (costs off) SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------ Sort Sort Key: d - -> Index Scan using atstsh_idx on atstsh - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Seq Scan on atstsh + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) (4 rows) SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; diff --git a/expected/int8_1.out b/expected/int8_1.out index fe7a3151fb..473eef3c35 100644 --- a/expected/int8_1.out +++ b/expected/int8_1.out @@ -219,14 +219,15 @@ CREATE TABLE test_int8_a AS SELECT id::int8, t FROM tsts; CREATE INDEX test_int8_a_idx ON test_int8_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); +ERROR: doesn't support order index over pass-by-reference column SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int8_a WHERE id < 400::int8; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +-------------------------------------- Aggregate - -> Index Scan using test_int8_a_idx on test_int8_a - Index Cond: (id < '400'::bigint) + -> Seq Scan on test_int8_a + Filter: (id < '400'::bigint) (3 rows) SELECT count(*) FROM test_int8_a WHERE id < 400::int8; @@ -237,48 +238,75 @@ SELECT count(*) FROM test_int8_a WHERE id < 400::int8; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int8_a WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------- Limit - -> Index Scan using test_int8_a_idx on test_int8_a - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (id <=> '400'::bigint) -(4 rows) + -> Sort + Sort Key: ((id <=> '400'::bigint)) + -> Seq Scan on test_int8_a + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, id <=> 400 FROM test_int8_a WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | ?column? +-----+---------- + 406 | 6 + 415 | 15 + 428 | 28 + 371 | 29 + 355 | 45 +(5 rows) + EXPLAIN (costs off) SELECT id, id <=| 400 FROM test_int8_a WHERE t @@ 'wr&qh' ORDER BY id <=| 400 LIMIT 5; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------- Limit - -> Index Scan using test_int8_a_idx on test_int8_a - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (id <=| '400'::bigint) -(4 rows) + -> Sort + Sort Key: ((id <=| '400'::bigint)) + -> Seq Scan on test_int8_a + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, id <=| 400 FROM test_int8_a WHERE t @@ 'wr&qh' ORDER BY id <=| 400 LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | ?column? +-----+---------- + 371 | 29 + 355 | 45 + 354 | 46 + 252 | 148 + 232 | 168 +(5 rows) + EXPLAIN (costs off) SELECT id, id |=> 400 FROM test_int8_a WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; - QUERY PLAN -------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------- Limit - -> Index Scan using test_int8_a_idx on test_int8_a - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (id |=> '400'::bigint) -(4 rows) + -> Sort + Sort Key: ((id |=> '400'::bigint)) + -> Seq Scan on test_int8_a + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, id |=> 400 FROM test_int8_a WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | ?column? +-----+---------- + 406 | 6 + 415 | 15 + 428 | 28 + 457 | 57 + 458 | 58 +(5 rows) + EXPLAIN (costs off) SELECT id FROM test_int8_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_int8_a_idx on test_int8_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::bigint)) + -> Seq Scan on test_int8_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::bigint)) (4 rows) SELECT id FROM test_int8_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; @@ -298,12 +326,12 @@ SELECT id FROM test_int8_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; EXPLAIN (costs off) SELECT id FROM test_int8_a WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_int8_a_idx on test_int8_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::bigint)) + -> Seq Scan on test_int8_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::bigint)) (4 rows) SELECT id FROM test_int8_a WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; @@ -474,14 +502,15 @@ CREATE TABLE test_int8_h_a AS SELECT id::int8, t FROM tsts; CREATE INDEX test_int8_h_a_idx ON test_int8_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); +ERROR: doesn't support order index over pass-by-reference column SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +-------------------------------------- Aggregate - -> Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: (id < '400'::bigint) + -> Seq Scan on test_int8_h_a + Filter: (id < '400'::bigint) (3 rows) SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; @@ -492,48 +521,75 @@ SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int8_h_a WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +--------------------------------------------------------- Limit - -> Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (id <=> '400'::bigint) -(4 rows) + -> Sort + Sort Key: ((id <=> '400'::bigint)) + -> Seq Scan on test_int8_h_a + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, id <=> 400 FROM test_int8_h_a WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | ?column? +-----+---------- + 406 | 6 + 415 | 15 + 428 | 28 + 371 | 29 + 355 | 45 +(5 rows) + EXPLAIN (costs off) SELECT id, id <=| 400 FROM test_int8_h_a WHERE t @@ 'wr&qh' ORDER BY id <=| 400 LIMIT 5; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +--------------------------------------------------------- Limit - -> Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (id <=| '400'::bigint) -(4 rows) + -> Sort + Sort Key: ((id <=| '400'::bigint)) + -> Seq Scan on test_int8_h_a + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, id <=| 400 FROM test_int8_h_a WHERE t @@ 'wr&qh' ORDER BY id <=| 400 LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | ?column? +-----+---------- + 371 | 29 + 355 | 45 + 354 | 46 + 252 | 148 + 232 | 168 +(5 rows) + EXPLAIN (costs off) SELECT id, id |=> 400 FROM test_int8_h_a WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; - QUERY PLAN ------------------------------------------------------------ + QUERY PLAN +--------------------------------------------------------- Limit - -> Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) - Order By: (id |=> '400'::bigint) -(4 rows) + -> Sort + Sort Key: ((id |=> '400'::bigint)) + -> Seq Scan on test_int8_h_a + Filter: (t @@ '''wr'' & ''qh'''::tsquery) +(5 rows) SELECT id, id |=> 400 FROM test_int8_h_a WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; -ERROR: doesn't support order by over pass-by-reference column + id | ?column? +-----+---------- + 406 | 6 + 415 | 15 + 428 | 28 + 457 | 57 + 458 | 58 +(5 rows) + EXPLAIN (costs off) SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::bigint)) + -> Seq Scan on test_int8_h_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::bigint)) (4 rows) SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; @@ -553,12 +609,12 @@ SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id EXPLAIN (costs off) SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; - QUERY PLAN ------------------------------------------------------------------------------------ + QUERY PLAN +------------------------------------------------------------------------------- Sort Sort Key: id - -> Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::bigint)) + -> Seq Scan on test_int8_h_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id >= '400'::bigint)) (4 rows) SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; @@ -578,12 +634,26 @@ CREATE INDEX test_int8_id_t_idx ON test_int8_o USING rum (t rum_tsvector_ops, id); EXPLAIN (costs off) SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id <=> 400::int8; - QUERY PLAN ------------------------------------------------------------------------------ - Index Scan using test_int8_h_a_idx on test_int8_h_a - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::bigint)) - Order By: (id <=> '400'::bigint) -(3 rows) + QUERY PLAN +------------------------------------------------------------------------------- + Sort + Sort Key: ((id <=> '400'::bigint)) + -> Seq Scan on test_int8_h_a + Filter: ((t @@ '''wr'' & ''qh'''::tsquery) AND (id <= '400'::bigint)) +(4 rows) SELECT id FROM test_int8_h_a WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id <=> 400::int8; -ERROR: doesn't support order by over pass-by-reference column + id +----- + 371 + 355 + 354 + 252 + 232 + 168 + 135 + 71 + 39 + 16 +(10 rows) + From 039b495c085e0dd9cbf04f9cc60231d6734a1093 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 22 Jan 2019 20:04:36 +0300 Subject: [PATCH 038/182] Issue #52: Handle rbtree renames for PostgreSQL 11.0 --- src/rum.h | 2 +- src/rumbulk.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rum.h b/src/rum.h index 9dcb41a730..30d2f78daa 100644 --- a/src/rum.h +++ b/src/rum.h @@ -783,7 +783,7 @@ extern IndexBulkDeleteResult *rumvacuumcleanup(IndexVacuumInfo *info, extern bool rumvalidate(Oid opclassoid); /* rumbulk.c */ -#if PG_VERSION_NUM <= 100006 +#if PG_VERSION_NUM <= 100006 || PG_VERSION_NUM == 110000 typedef RBNode RBTNode; #endif diff --git a/src/rumbulk.c b/src/rumbulk.c index 9bbe88da6a..f4dbe432ac 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -22,7 +22,7 @@ #define DEF_NPTR 5 /* ItemPointer initial allocation quantum */ /* PostgreSQL pre 10 has different names for this functions */ -#if PG_VERSION_NUM <= 100006 +#if PG_VERSION_NUM <= 100006 || PG_VERSION_NUM == 110000 #define rbt_create(node_size, comparator, combiner, allocfunc, freefunc, arg) \ (rb_create(node_size, comparator, combiner, allocfunc, freefunc, arg)) #define rbt_insert(rbt, data, isNew) \ @@ -280,7 +280,7 @@ qsortCompareRumItem(const void *a, const void *b, void *arg) void rumBeginBAScan(BuildAccumulator *accum) { -#if PG_VERSION_NUM > 100006 +#if PG_VERSION_NUM > 100006 && PG_VERSION_NUM >= 110001 rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); #elif PG_VERSION_NUM >= 100000 rb_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); @@ -302,7 +302,7 @@ rumGetBAEntry(BuildAccumulator *accum, RumEntryAccumulator *entry; RumItem *list; -#if PG_VERSION_NUM > 100006 +#if PG_VERSION_NUM > 100006 && PG_VERSION_NUM >= 110001 entry = (RumEntryAccumulator *) rbt_iterate(&accum->tree_walk); #elif PG_VERSION_NUM >= 100000 entry = (RumEntryAccumulator *) rb_iterate(&accum->tree_walk); From ddd144bdcb6decf80a71c2e55e67d3eb801b5954 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 8 Feb 2019 18:34:00 +0300 Subject: [PATCH 039/182] PGPRO-2412: Test bitmap index scan and index scan more accurately --- expected/orderby.out | 227 ++++++++++++++++++++++++++---------- expected/orderby_1.out | 219 ++++++++++++++++++++++++---------- expected/orderby_hash.out | 227 ++++++++++++++++++++++++++---------- expected/orderby_hash_1.out | 219 ++++++++++++++++++++++++---------- sql/orderby.sql | 40 +++++-- sql/orderby_hash.sql | 40 +++++-- 6 files changed, 702 insertions(+), 270 deletions(-) diff --git a/expected/orderby.out b/expected/orderby.out index 38cda70f32..9bc421970c 100644 --- a/expected/orderby.out +++ b/expected/orderby.out @@ -2,8 +2,82 @@ CREATE TABLE tsts (id int, t tsvector, d timestamp); \copy tsts from 'data/tsts.data' CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't'); -INSERT INTO tsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO tsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO tsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO tsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +SET enable_indexscan=OFF; +SET enable_indexonlyscan=OFF; +SET enable_bitmapscan=OFF; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 +(5 rows) + +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 + 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 + 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 +(5 rows) + +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 + 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 +(5 rows) + +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 +(9 rows) + +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 +(8 rows) + +-- Test bitmap index scan +RESET enable_bitmapscan; +SET enable_seqscan = off; +EXPLAIN (costs off) +SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(5 rows) + SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; count ------- @@ -40,9 +114,19 @@ SELECT count(*) FROM tsts WHERE t @@ '(eq|yt)&(wr|qh)'; 39 (1 row) -SET enable_indexscan=OFF; -SET enable_indexonlyscan=OFF; -SET enable_bitmapscan=OFF; +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -53,6 +137,19 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -63,6 +160,19 @@ SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -73,6 +183,37 @@ SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +----------------------------------------------------------------------------------- + Limit + -> Index Scan using tsts_idx on tsts + Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) + +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 356 | Mon May 16 15:21:22.326724 2016 | 3597.326724 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 357 | Mon May 16 16:21:22.326724 2016 | 7197.326724 + 353 | Mon May 16 12:21:22.326724 2016 | 7202.673276 +(5 rows) + +EXPLAIN (costs off) +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tsts + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tsts_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -87,6 +228,18 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER 355 | Mon May 16 14:21:22.326724 2016 (9 rows) +EXPLAIN (costs off) +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tsts + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tsts_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -100,20 +253,18 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) +-- Test index scan RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; -SET enable_seqscan = off; +SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on tsts - Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) - -> Bitmap Index Scan on tsts_idx - Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) -(5 rows) + -> Index Scan using tsts_idx on tsts + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(3 rows) SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; count @@ -277,54 +428,6 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) -SET enable_bitmapscan=OFF; -EXPLAIN (costs off) -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tsts_idx on tsts - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 16 | Mon May 02 11:21:22.326724 2016 - 39 | Tue May 03 10:21:22.326724 2016 - 71 | Wed May 04 18:21:22.326724 2016 - 135 | Sat May 07 10:21:22.326724 2016 - 168 | Sun May 08 19:21:22.326724 2016 - 232 | Wed May 11 11:21:22.326724 2016 - 252 | Thu May 12 07:21:22.326724 2016 - 354 | Mon May 16 13:21:22.326724 2016 - 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) - -EXPLAIN (costs off) -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tsts_idx on tsts - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 371 | Tue May 17 06:21:22.326724 2016 - 406 | Wed May 18 17:21:22.326724 2016 - 415 | Thu May 19 02:21:22.326724 2016 - 428 | Thu May 19 15:21:22.326724 2016 - 457 | Fri May 20 20:21:22.326724 2016 - 458 | Fri May 20 21:21:22.326724 2016 - 484 | Sat May 21 23:21:22.326724 2016 - 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) - SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d ASC LIMIT 3; id | d ----+--------------------------------- diff --git a/expected/orderby_1.out b/expected/orderby_1.out index 09ace4276c..a6fb68c1e3 100644 --- a/expected/orderby_1.out +++ b/expected/orderby_1.out @@ -2,8 +2,82 @@ CREATE TABLE tsts (id int, t tsvector, d timestamp); \copy tsts from 'data/tsts.data' CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't'); -INSERT INTO tsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO tsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO tsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO tsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +SET enable_indexscan=OFF; +SET enable_indexonlyscan=OFF; +SET enable_bitmapscan=OFF; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 +(5 rows) + +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 + 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 + 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 +(5 rows) + +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 + 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 +(5 rows) + +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 +(9 rows) + +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 +(8 rows) + +-- Test bitmap index scan +RESET enable_bitmapscan; +SET enable_seqscan = off; +EXPLAIN (costs off) +SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(5 rows) + SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; count ------- @@ -40,9 +114,19 @@ SELECT count(*) FROM tsts WHERE t @@ '(eq|yt)&(wr|qh)'; 39 (1 row) -SET enable_indexscan=OFF; -SET enable_indexonlyscan=OFF; -SET enable_bitmapscan=OFF; +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -53,6 +137,19 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -63,6 +160,19 @@ SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tsts + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tsts_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -73,6 +183,29 @@ SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +----------------------------------------------------------------------------------- + Limit + -> Index Scan using tsts_idx on tsts + Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) + +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +ERROR: doesn't support order by over pass-by-reference column +EXPLAIN (costs off) +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tsts + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tsts_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -87,6 +220,18 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER 355 | Mon May 16 14:21:22.326724 2016 (9 rows) +EXPLAIN (costs off) +SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tsts + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tsts_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -100,20 +245,18 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) +-- Test index scan RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; -SET enable_seqscan = off; +SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on tsts - Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) - -> Bitmap Index Scan on tsts_idx - Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) -(5 rows) + -> Index Scan using tsts_idx on tsts + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(3 rows) SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; count @@ -245,54 +388,6 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) -SET enable_bitmapscan=OFF; -EXPLAIN (costs off) -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tsts_idx on tsts - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 16 | Mon May 02 11:21:22.326724 2016 - 39 | Tue May 03 10:21:22.326724 2016 - 71 | Wed May 04 18:21:22.326724 2016 - 135 | Sat May 07 10:21:22.326724 2016 - 168 | Sun May 08 19:21:22.326724 2016 - 232 | Wed May 11 11:21:22.326724 2016 - 252 | Thu May 12 07:21:22.326724 2016 - 354 | Mon May 16 13:21:22.326724 2016 - 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) - -EXPLAIN (costs off) -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tsts_idx on tsts - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 371 | Tue May 17 06:21:22.326724 2016 - 406 | Wed May 18 17:21:22.326724 2016 - 415 | Thu May 19 02:21:22.326724 2016 - 428 | Thu May 19 15:21:22.326724 2016 - 457 | Fri May 20 20:21:22.326724 2016 - 458 | Fri May 20 21:21:22.326724 2016 - 484 | Sat May 21 23:21:22.326724 2016 - 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) - SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d ASC LIMIT 3; id | d ----+--------------------------------- diff --git a/expected/orderby_hash.out b/expected/orderby_hash.out index 1636088fdb..7ff1794c5f 100644 --- a/expected/orderby_hash.out +++ b/expected/orderby_hash.out @@ -2,8 +2,82 @@ CREATE TABLE tstsh (id int, t tsvector, d timestamp); \copy tstsh from 'data/tsts.data' CREATE INDEX tstsh_idx ON tstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't'); -INSERT INTO tstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO tstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO tstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO tstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +SET enable_indexscan=OFF; +SET enable_indexonlyscan=OFF; +SET enable_bitmapscan=OFF; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 +(5 rows) + +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 + 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 + 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 +(5 rows) + +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 + 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 +(5 rows) + +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 +(9 rows) + +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 +(8 rows) + +-- Test bitmap index scan +RESET enable_bitmapscan; +SET enable_seqscan = off; +EXPLAIN (costs off) +SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(5 rows) + SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; count ------- @@ -40,9 +114,19 @@ SELECT count(*) FROM tstsh WHERE t @@ '(eq|yt)&(wr|qh)'; 39 (1 row) -SET enable_indexscan=OFF; -SET enable_indexonlyscan=OFF; -SET enable_bitmapscan=OFF; +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -53,6 +137,19 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -63,6 +160,19 @@ SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -73,6 +183,37 @@ SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +----------------------------------------------------------------------------------- + Limit + -> Index Scan using tstsh_idx on tstsh + Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) + +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 356 | Mon May 16 15:21:22.326724 2016 | 3597.326724 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 357 | Mon May 16 16:21:22.326724 2016 | 7197.326724 + 353 | Mon May 16 12:21:22.326724 2016 | 7202.673276 +(5 rows) + +EXPLAIN (costs off) +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tstsh + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tstsh_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -87,6 +228,18 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER 355 | Mon May 16 14:21:22.326724 2016 (9 rows) +EXPLAIN (costs off) +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tstsh + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tstsh_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -100,20 +253,18 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) +-- Test index scan RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; -SET enable_seqscan = off; +SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on tstsh - Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) - -> Bitmap Index Scan on tstsh_idx - Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) -(5 rows) + -> Index Scan using tstsh_idx on tstsh + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(3 rows) SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; count @@ -277,54 +428,6 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) -SET enable_bitmapscan=OFF; -EXPLAIN (costs off) -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tstsh_idx on tstsh - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 16 | Mon May 02 11:21:22.326724 2016 - 39 | Tue May 03 10:21:22.326724 2016 - 71 | Wed May 04 18:21:22.326724 2016 - 135 | Sat May 07 10:21:22.326724 2016 - 168 | Sun May 08 19:21:22.326724 2016 - 232 | Wed May 11 11:21:22.326724 2016 - 252 | Thu May 12 07:21:22.326724 2016 - 354 | Mon May 16 13:21:22.326724 2016 - 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) - -EXPLAIN (costs off) -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tstsh_idx on tstsh - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 371 | Tue May 17 06:21:22.326724 2016 - 406 | Wed May 18 17:21:22.326724 2016 - 415 | Thu May 19 02:21:22.326724 2016 - 428 | Thu May 19 15:21:22.326724 2016 - 457 | Fri May 20 20:21:22.326724 2016 - 458 | Fri May 20 21:21:22.326724 2016 - 484 | Sat May 21 23:21:22.326724 2016 - 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) - SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d ASC LIMIT 3; id | d ----+--------------------------------- diff --git a/expected/orderby_hash_1.out b/expected/orderby_hash_1.out index 8182aff567..f32267631c 100644 --- a/expected/orderby_hash_1.out +++ b/expected/orderby_hash_1.out @@ -2,8 +2,82 @@ CREATE TABLE tstsh (id int, t tsvector, d timestamp); \copy tstsh from 'data/tsts.data' CREATE INDEX tstsh_idx ON tstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't'); -INSERT INTO tstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO tstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO tstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO tstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +SET enable_indexscan=OFF; +SET enable_indexonlyscan=OFF; +SET enable_bitmapscan=OFF; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 +(5 rows) + +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 + 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 + 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 + 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 +(5 rows) + +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + id | d | ?column? +-----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 + 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 + 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 +(5 rows) + +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 +(9 rows) + +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 +(8 rows) + +-- Test bitmap index scan +RESET enable_bitmapscan; +SET enable_seqscan = off; +EXPLAIN (costs off) +SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; + QUERY PLAN +------------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(5 rows) + SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; count ------- @@ -40,9 +114,19 @@ SELECT count(*) FROM tstsh WHERE t @@ '(eq|yt)&(wr|qh)'; 39 (1 row) -SET enable_indexscan=OFF; -SET enable_indexonlyscan=OFF; -SET enable_bitmapscan=OFF; +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -53,6 +137,19 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d <=| 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -63,6 +160,19 @@ SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +------------------------------------------------------------------------------------- + Limit + -> Sort + Sort Key: ((d |=> 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Heap Scan on tstsh + Recheck Cond: (t @@ '''wr'' & ''qh'''::tsquery) + -> Bitmap Index Scan on tstsh_idx + Index Cond: (t @@ '''wr'' & ''qh'''::tsquery) +(7 rows) + SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- @@ -73,6 +183,29 @@ SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + QUERY PLAN +----------------------------------------------------------------------------------- + Limit + -> Index Scan using tstsh_idx on tstsh + Order By: (d <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(3 rows) + +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +ERROR: doesn't support order by over pass-by-reference column +EXPLAIN (costs off) +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tstsh + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tstsh_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -87,6 +220,18 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER 355 | Mon May 16 14:21:22.326724 2016 (9 rows) +EXPLAIN (costs off) +SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Bitmap Heap Scan on tstsh + Recheck Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) + -> Bitmap Index Scan on tstsh_idx + Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(6 rows) + SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- @@ -100,20 +245,18 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) +-- Test index scan RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; -SET enable_seqscan = off; +SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; - QUERY PLAN -------------------------------------------------------------- + QUERY PLAN +------------------------------------------------------- Aggregate - -> Bitmap Heap Scan on tstsh - Recheck Cond: (t @@ '''wr'' | ''qh'''::tsquery) - -> Bitmap Index Scan on tstsh_idx - Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) -(5 rows) + -> Index Scan using tstsh_idx on tstsh + Index Cond: (t @@ '''wr'' | ''qh'''::tsquery) +(3 rows) SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; count @@ -245,54 +388,6 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (8 rows) -SET enable_bitmapscan=OFF; -EXPLAIN (costs off) -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tstsh_idx on tstsh - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d <= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 16 | Mon May 02 11:21:22.326724 2016 - 39 | Tue May 03 10:21:22.326724 2016 - 71 | Wed May 04 18:21:22.326724 2016 - 135 | Sat May 07 10:21:22.326724 2016 - 168 | Sun May 08 19:21:22.326724 2016 - 232 | Wed May 11 11:21:22.326724 2016 - 252 | Thu May 12 07:21:22.326724 2016 - 354 | Mon May 16 13:21:22.326724 2016 - 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) - -EXPLAIN (costs off) -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - QUERY PLAN ----------------------------------------------------------------------------------------------------------------------------- - Sort - Sort Key: d - -> Index Scan using tstsh_idx on tstsh - Index Cond: ((t @@ '''wr'' & ''qh'''::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) -(4 rows) - -SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; - id | d ------+--------------------------------- - 371 | Tue May 17 06:21:22.326724 2016 - 406 | Wed May 18 17:21:22.326724 2016 - 415 | Thu May 19 02:21:22.326724 2016 - 428 | Thu May 19 15:21:22.326724 2016 - 457 | Fri May 20 20:21:22.326724 2016 - 458 | Fri May 20 21:21:22.326724 2016 - 484 | Sat May 21 23:21:22.326724 2016 - 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) - SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d ASC LIMIT 3; id | d ----+--------------------------------- diff --git a/sql/orderby.sql b/sql/orderby.sql index f254483ae0..28e5b6038b 100644 --- a/sql/orderby.sql +++ b/sql/orderby.sql @@ -6,17 +6,10 @@ CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't'); -INSERT INTO tsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO tsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO tsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO tsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); -SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; -SELECT count(*) FROM tsts WHERE t @@ 'wr&qh'; -SELECT count(*) FROM tsts WHERE t @@ 'eq&yt'; -SELECT count(*) FROM tsts WHERE t @@ 'eq|yt'; -SELECT count(*) FROM tsts WHERE t @@ '(eq&yt)|(wr&qh)'; -SELECT count(*) FROM tsts WHERE t @@ '(eq|yt)&(wr|qh)'; - SET enable_indexscan=OFF; SET enable_indexonlyscan=OFF; SET enable_bitmapscan=OFF; @@ -27,8 +20,7 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; -RESET enable_indexscan; -RESET enable_indexonlyscan; +-- Test bitmap index scan RESET enable_bitmapscan; SET enable_seqscan = off; @@ -62,8 +54,34 @@ EXPLAIN (costs off) SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; +-- Test index scan +RESET enable_indexscan; +RESET enable_indexonlyscan; SET enable_bitmapscan=OFF; +EXPLAIN (costs off) +SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; +SELECT count(*) FROM tsts WHERE t @@ 'wr|qh'; +SELECT count(*) FROM tsts WHERE t @@ 'wr&qh'; +SELECT count(*) FROM tsts WHERE t @@ 'eq&yt'; +SELECT count(*) FROM tsts WHERE t @@ 'eq|yt'; +SELECT count(*) FROM tsts WHERE t @@ '(eq&yt)|(wr&qh)'; +SELECT count(*) FROM tsts WHERE t @@ '(eq|yt)&(wr|qh)'; + +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +EXPLAIN (costs off) +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; +EXPLAIN (costs off) +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + EXPLAIN (costs off) SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; diff --git a/sql/orderby_hash.sql b/sql/orderby_hash.sql index 66a45268ca..f7e9808538 100644 --- a/sql/orderby_hash.sql +++ b/sql/orderby_hash.sql @@ -6,17 +6,10 @@ CREATE INDEX tstsh_idx ON tstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't'); -INSERT INTO tstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO tstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO tstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO tstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); -SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; -SELECT count(*) FROM tstsh WHERE t @@ 'wr&qh'; -SELECT count(*) FROM tstsh WHERE t @@ 'eq&yt'; -SELECT count(*) FROM tstsh WHERE t @@ 'eq|yt'; -SELECT count(*) FROM tstsh WHERE t @@ '(eq&yt)|(wr&qh)'; -SELECT count(*) FROM tstsh WHERE t @@ '(eq|yt)&(wr|qh)'; - SET enable_indexscan=OFF; SET enable_indexonlyscan=OFF; SET enable_bitmapscan=OFF; @@ -27,8 +20,7 @@ SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; -RESET enable_indexscan; -RESET enable_indexonlyscan; +-- Test bitmap index scan RESET enable_bitmapscan; SET enable_seqscan = off; @@ -62,8 +54,34 @@ EXPLAIN (costs off) SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; +-- Test index scan +RESET enable_indexscan; +RESET enable_indexonlyscan; SET enable_bitmapscan=OFF; +EXPLAIN (costs off) +SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; +SELECT count(*) FROM tstsh WHERE t @@ 'wr|qh'; +SELECT count(*) FROM tstsh WHERE t @@ 'wr&qh'; +SELECT count(*) FROM tstsh WHERE t @@ 'eq&yt'; +SELECT count(*) FROM tstsh WHERE t @@ 'eq|yt'; +SELECT count(*) FROM tstsh WHERE t @@ '(eq&yt)|(wr&qh)'; +SELECT count(*) FROM tstsh WHERE t @@ '(eq|yt)&(wr|qh)'; + +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +EXPLAIN (costs off) +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d <=| '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; +EXPLAIN (costs off) +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d |=> '2016-05-16 14:21:25' FROM tstsh WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; + +EXPLAIN (costs off) +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tstsh ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + EXPLAIN (costs off) SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM tstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; From b0eb0430649c4ac86ed9358d2fb88d54f8ab1929 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 11 Feb 2019 11:42:01 +0300 Subject: [PATCH 040/182] Handle rbtree renames for PostgreSQL 10.7 --- src/rumbulk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/rumbulk.c b/src/rumbulk.c index f4dbe432ac..15868cc34f 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -280,7 +280,7 @@ qsortCompareRumItem(const void *a, const void *b, void *arg) void rumBeginBAScan(BuildAccumulator *accum) { -#if PG_VERSION_NUM > 100006 && PG_VERSION_NUM >= 110001 +#if (PG_VERSION_NUM > 100006 && PG_VERSION_NUM < 110000) || PG_VERSION_NUM >= 110001 rbt_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); #elif PG_VERSION_NUM >= 100000 rb_begin_iterate(accum->tree, LeftRightWalk, &accum->tree_walk); @@ -302,7 +302,7 @@ rumGetBAEntry(BuildAccumulator *accum, RumEntryAccumulator *entry; RumItem *list; -#if PG_VERSION_NUM > 100006 && PG_VERSION_NUM >= 110001 +#if (PG_VERSION_NUM > 100006 && PG_VERSION_NUM < 110000) || PG_VERSION_NUM >= 110001 entry = (RumEntryAccumulator *) rbt_iterate(&accum->tree_walk); #elif PG_VERSION_NUM >= 100000 entry = (RumEntryAccumulator *) rb_iterate(&accum->tree_walk); From c57a87b16277052cbceb9d2eed054e004793ba86 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 26 Feb 2019 17:16:46 +0300 Subject: [PATCH 041/182] Run wal-check by check command --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 19476e53af..36ae70c8d2 100644 --- a/Makefile +++ b/Makefile @@ -52,6 +52,8 @@ endif wal-check: temp-install $(prove_check) +check: wal-check + all: $(SQL_built) #9.6 requires 1.3 file but 10.0 could live with update files From 98d0097d26c29e696173fe2e6256a7dc9cd03fbc Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 15 Mar 2019 16:11:35 +0300 Subject: [PATCH 042/182] PGPRO-2534: Page get uninitialized because of PostingItem aligning --- src/rumbulk.c | 1 + src/rumdatapage.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/rumbulk.c b/src/rumbulk.c index 15868cc34f..a84661484e 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -167,6 +167,7 @@ rumInsertBAEntry(BuildAccumulator *accum, eatmp.category = category; /* temporarily set up single-entry itempointer list */ eatmp.list = &item; + memset(&item, 0, sizeof(item)); item.iptr = *heapptr; item.addInfo = addInfo; item.addInfoIsNull = addInfoIsNull; diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 18b537a2fc..f948a8627d 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -1056,6 +1056,7 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, int maxItemIndex = btree->curitem; static char lpageCopy[BLCKSZ]; + memset(&item, 0, sizeof(item)); dataPrepareData(btree, newlPage, off); maxoff = RumPageGetOpaque(newlPage)->maxoff; From a2448b5ec1aaf2bb2fb4377e298f6aee9dd23302 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 15 Mar 2019 17:35:00 +0300 Subject: [PATCH 043/182] PGPRO-2537: Alexander Lakhin: Initialize with zeroes RumSortItem --- expected/altorder.out | 223 +++++++++++++++++++++++++++++++----------- sql/altorder.sql | 7 +- src/rumget.c | 4 +- 3 files changed, 171 insertions(+), 63 deletions(-) diff --git a/expected/altorder.out b/expected/altorder.out index f99f0b1e81..23f4843982 100644 --- a/expected/altorder.out +++ b/expected/altorder.out @@ -1,116 +1,170 @@ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' CREATE INDEX atsts_idx ON atsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); -INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; count ------- - 158 + 632 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'wr&qh'; count ------- - 17 + 68 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq&yt'; count ------- - 6 + 24 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq|yt'; count ------- - 98 + 392 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq&yt)|(wr&qh)'; count ------- - 23 + 92 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq|yt)&(wr|qh)'; count ------- - 39 + 156 (1 row) SET enable_indexscan=OFF; SET enable_indexonlyscan=OFF; SET enable_bitmapscan=OFF; SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 - 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 - 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 - 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 - 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 - 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; count ------- - 357 + 1422 (1 row) SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; count ------- - 153 + 612 (1 row) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 71 | Wed May 04 18:21:22.326724 2016 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 232 | Wed May 11 11:21:22.326724 2016 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) + 355 | Mon May 16 14:21:22.326724 2016 +(36 rows) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 406 | Wed May 18 17:21:22.326724 2016 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 484 | Sat May 21 23:21:22.326724 2016 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 +(32 rows) RESET enable_indexscan; RESET enable_indexonlyscan; @@ -130,37 +184,37 @@ SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; count ------- - 158 + 632 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'wr&qh'; count ------- - 17 + 68 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq&yt'; count ------- - 6 + 24 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq|yt'; count ------- - 98 + 392 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq&yt)|(wr&qh)'; count ------- - 23 + 92 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq|yt)&(wr|qh)'; count ------- - 39 + 156 (1 row) EXPLAIN (costs off) @@ -177,7 +231,7 @@ SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; count ------- - 357 + 1422 (1 row) EXPLAIN (costs off) @@ -194,7 +248,7 @@ SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; count ------- - 153 + 612 (1 row) EXPLAIN (costs off) @@ -208,13 +262,13 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY (4 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 - 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) EXPLAIN (costs off) @@ -228,13 +282,13 @@ SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY (4 rows) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 - 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 - 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) EXPLAIN (costs off) @@ -250,11 +304,11 @@ SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 - 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 - 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) EXPLAIN (costs off) @@ -269,11 +323,11 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts ORDER BY d <=> '2016-05-16 SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 356 | Mon May 16 15:21:22.326724 2016 | 3597.326724 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 357 | Mon May 16 16:21:22.326724 2016 | 7197.326724 - 353 | Mon May 16 12:21:22.326724 2016 | 7202.673276 (5 rows) EXPLAIN (costs off) @@ -290,15 +344,42 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER id | d -----+--------------------------------- 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 71 | Wed May 04 18:21:22.326724 2016 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 232 | Wed May 11 11:21:22.326724 2016 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) + 355 | Mon May 16 14:21:22.326724 2016 +(36 rows) EXPLAIN (costs off) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; @@ -314,12 +395,36 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER id | d -----+--------------------------------- 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 457 | Fri May 20 20:21:22.326724 2016 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) +(32 rows) diff --git a/sql/altorder.sql b/sql/altorder.sql index 85c6cdf630..d68eb20288 100644 --- a/sql/altorder.sql +++ b/sql/altorder.sql @@ -1,13 +1,16 @@ CREATE TABLE atsts (id int, t tsvector, d timestamp); +\copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' \copy atsts from 'data/tsts.data' CREATE INDEX atsts_idx ON atsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); -INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; diff --git a/src/rumget.c b/src/rumget.c index 2e5dd2593b..e67c66f6aa 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -2241,8 +2241,8 @@ insertScanItem(RumScanOpaque so, bool recheck) j; item = (RumSortItem *) - MemoryContextAlloc(rum_tuplesort_get_memorycontext(so->sortstate), - RumSortItemSize(so->norderbys)); + MemoryContextAllocZero(rum_tuplesort_get_memorycontext(so->sortstate), + RumSortItemSize(so->norderbys)); item->iptr = so->item.iptr; item->recheck = recheck; From 6dfb41a86c54e4a3bb4e3ee619684db26ca6814e Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 15 Mar 2019 17:37:44 +0300 Subject: [PATCH 044/182] Add comment into altorder.sql --- expected/altorder.out | 1 + sql/altorder.sql | 1 + 2 files changed, 2 insertions(+) diff --git a/expected/altorder.out b/expected/altorder.out index 23f4843982..24d3e1ea79 100644 --- a/expected/altorder.out +++ b/expected/altorder.out @@ -1,5 +1,6 @@ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' +-- PGPRO-2537: We need more data to test rumsort.c with logtape.c \copy atsts from 'data/tsts.data' \copy atsts from 'data/tsts.data' \copy atsts from 'data/tsts.data' diff --git a/sql/altorder.sql b/sql/altorder.sql index d68eb20288..5f74b7a7b5 100644 --- a/sql/altorder.sql +++ b/sql/altorder.sql @@ -1,6 +1,7 @@ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' +-- PGPRO-2537: We need more data to test rumsort.c with logtape.c \copy atsts from 'data/tsts.data' \copy atsts from 'data/tsts.data' \copy atsts from 'data/tsts.data' From 4ae20818c496220862cb1ac309abeffff8cf3d73 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 15 Mar 2019 19:10:32 +0300 Subject: [PATCH 045/182] PGPRO-2538: Improve tests, force Index Scan --- expected/altorder_1.out | 224 ++++++++++++++++++++++++++--------- expected/altorder_hash.out | 13 +- expected/altorder_hash_1.out | 13 +- sql/altorder_hash.sql | 14 ++- 4 files changed, 190 insertions(+), 74 deletions(-) diff --git a/expected/altorder_1.out b/expected/altorder_1.out index ee9396b80c..0556317f60 100644 --- a/expected/altorder_1.out +++ b/expected/altorder_1.out @@ -1,117 +1,172 @@ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' +-- PGPRO-2537: We need more data to test rumsort.c with logtape.c +\copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' +\copy atsts from 'data/tsts.data' CREATE INDEX atsts_idx ON atsts USING rum (t rum_tsvector_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); ERROR: doesn't support order index over pass-by-reference column -INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO atsts VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO atsts VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; count ------- - 158 + 632 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'wr&qh'; count ------- - 17 + 68 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq&yt'; count ------- - 6 + 24 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq|yt'; count ------- - 98 + 392 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq&yt)|(wr&qh)'; count ------- - 23 + 92 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq|yt)&(wr|qh)'; count ------- - 39 + 156 (1 row) SET enable_indexscan=OFF; SET enable_indexonlyscan=OFF; SET enable_bitmapscan=OFF; SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 - 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 - 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 - 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 - 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 - 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; count ------- - 357 + 1422 (1 row) SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; count ------- - 153 + 612 (1 row) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 71 | Wed May 04 18:21:22.326724 2016 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 232 | Wed May 11 11:21:22.326724 2016 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) + 355 | Mon May 16 14:21:22.326724 2016 +(36 rows) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; id | d -----+--------------------------------- 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 406 | Wed May 18 17:21:22.326724 2016 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 484 | Sat May 21 23:21:22.326724 2016 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 +(32 rows) RESET enable_indexscan; RESET enable_indexonlyscan; @@ -129,37 +184,37 @@ SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; count ------- - 158 + 632 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'wr&qh'; count ------- - 17 + 68 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq&yt'; count ------- - 6 + 24 (1 row) SELECT count(*) FROM atsts WHERE t @@ 'eq|yt'; count ------- - 98 + 392 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq&yt)|(wr&qh)'; count ------- - 23 + 92 (1 row) SELECT count(*) FROM atsts WHERE t @@ '(eq|yt)&(wr|qh)'; count ------- - 39 + 156 (1 row) EXPLAIN (costs off) @@ -174,7 +229,7 @@ SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; SELECT count(*) FROM atsts WHERE d < '2016-05-16 14:21:25'; count ------- - 357 + 1422 (1 row) EXPLAIN (costs off) @@ -189,7 +244,7 @@ SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; count ------- - 153 + 612 (1 row) EXPLAIN (costs off) @@ -204,13 +259,13 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY (5 rows) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 - 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) EXPLAIN (costs off) @@ -225,13 +280,13 @@ SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY (5 rows) SELECT id, d, d <=| '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=| '2016-05-16 14:21:25' LIMIT 5; - id | d | ?column? ------+---------------------------------+--------------- - 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 252 | Thu May 12 07:21:22.326724 2016 | 370802.673276 - 232 | Wed May 11 11:21:22.326724 2016 | 442802.673276 - 168 | Sun May 08 19:21:22.326724 2016 | 673202.673276 + id | d | ?column? +-----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 (5 rows) EXPLAIN (costs off) @@ -248,11 +303,11 @@ SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY SELECT id, d, d |=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d |=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+--------------- + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 + 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 371 | Tue May 17 06:21:22.326724 2016 | 57597.326724 406 | Wed May 18 17:21:22.326724 2016 | 183597.326724 - 415 | Thu May 19 02:21:22.326724 2016 | 215997.326724 - 428 | Thu May 19 15:21:22.326724 2016 | 262797.326724 - 457 | Fri May 20 20:21:22.326724 2016 | 367197.326724 (5 rows) EXPLAIN (costs off) @@ -268,11 +323,11 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts ORDER BY d <=> '2016-05-16 SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; id | d | ?column? -----+---------------------------------+------------- + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 + 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 355 | Mon May 16 14:21:22.326724 2016 | 2.673276 356 | Mon May 16 15:21:22.326724 2016 | 3597.326724 - 354 | Mon May 16 13:21:22.326724 2016 | 3602.673276 - 357 | Mon May 16 16:21:22.326724 2016 | 7197.326724 - 353 | Mon May 16 12:21:22.326724 2016 | 7202.673276 (5 rows) EXPLAIN (costs off) @@ -289,15 +344,42 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER id | d -----+--------------------------------- 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 16 | Mon May 02 11:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 39 | Tue May 03 10:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 + 71 | Wed May 04 18:21:22.326724 2016 71 | Wed May 04 18:21:22.326724 2016 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 135 | Sat May 07 10:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 168 | Sun May 08 19:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 + 232 | Wed May 11 11:21:22.326724 2016 232 | Wed May 11 11:21:22.326724 2016 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 252 | Thu May 12 07:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 354 | Mon May 16 13:21:22.326724 2016 + 354 | Mon May 16 13:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 + 355 | Mon May 16 14:21:22.326724 2016 355 | Mon May 16 14:21:22.326724 2016 -(9 rows) + 355 | Mon May 16 14:21:22.326724 2016 +(36 rows) EXPLAIN (costs off) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; @@ -313,12 +395,36 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER id | d -----+--------------------------------- 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 457 | Fri May 20 20:21:22.326724 2016 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 496 | Sun May 22 11:21:22.326724 2016 -(8 rows) +(32 rows) diff --git a/expected/altorder_hash.out b/expected/altorder_hash.out index a828287541..125c195be9 100644 --- a/expected/altorder_hash.out +++ b/expected/altorder_hash.out @@ -2,8 +2,8 @@ CREATE TABLE atstsh (id int, t tsvector, d timestamp); \copy atstsh from 'data/tsts.data' CREATE INDEX atstsh_idx ON atstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); -INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; count ------- @@ -112,9 +112,8 @@ SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDE 496 | Sun May 22 11:21:22.326724 2016 (8 rows) -RESET enable_indexscan; -RESET enable_indexonlyscan; -RESET enable_bitmapscan; +-- Test bitmap index scan +SET enable_bitmapscan=on; SET enable_seqscan = off; EXPLAIN (costs off) SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; @@ -197,6 +196,10 @@ SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; 153 (1 row) +-- Test index scan +SET enable_indexscan=on; +SET enable_indexonlyscan=on; +SET enable_bitmapscan=off; EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; QUERY PLAN diff --git a/expected/altorder_hash_1.out b/expected/altorder_hash_1.out index 835c4ed88b..2d93f3f5eb 100644 --- a/expected/altorder_hash_1.out +++ b/expected/altorder_hash_1.out @@ -3,8 +3,8 @@ CREATE TABLE atstsh (id int, t tsvector, d timestamp); CREATE INDEX atstsh_idx ON atstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); ERROR: doesn't support order index over pass-by-reference column -INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; count ------- @@ -113,9 +113,8 @@ SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDE 496 | Sun May 22 11:21:22.326724 2016 (8 rows) -RESET enable_indexscan; -RESET enable_indexonlyscan; -RESET enable_bitmapscan; +-- Test bitmap index scan +SET enable_bitmapscan=on; SET enable_seqscan = off; EXPLAIN (costs off) SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; @@ -192,6 +191,10 @@ SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; 153 (1 row) +-- Test index scan +SET enable_indexscan=on; +SET enable_indexonlyscan=on; +SET enable_bitmapscan=off; EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; QUERY PLAN diff --git a/sql/altorder_hash.sql b/sql/altorder_hash.sql index ff07b4f769..3b723876f9 100644 --- a/sql/altorder_hash.sql +++ b/sql/altorder_hash.sql @@ -6,8 +6,8 @@ CREATE INDEX atstsh_idx ON atstsh USING rum (t rum_tsvector_hash_addon_ops, d) WITH (attach = 'd', to = 't', order_by_attach='t'); -INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); -INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); +INSERT INTO atstsh VALUES (-1, 't1 t2', '2016-05-02 02:24:22.326724'); +INSERT INTO atstsh VALUES (-2, 't1 t2 t3', '2016-05-02 02:26:22.326724'); SELECT count(*) FROM atstsh WHERE t @@ 'wr|qh'; @@ -30,9 +30,8 @@ SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM atstsh WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; -RESET enable_indexscan; -RESET enable_indexonlyscan; -RESET enable_bitmapscan; +-- Test bitmap index scan +SET enable_bitmapscan=on; SET enable_seqscan = off; EXPLAIN (costs off) @@ -52,6 +51,11 @@ EXPLAIN (costs off) SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; SELECT count(*) FROM atstsh WHERE d > '2016-05-16 14:21:25'; +-- Test index scan +SET enable_indexscan=on; +SET enable_indexonlyscan=on; +SET enable_bitmapscan=off; + EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atstsh WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; From 7acf4f184b5a88b13c6fa34f6f4247c8990064d5 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 18 Mar 2019 12:48:10 +0300 Subject: [PATCH 046/182] PGPRO-2540: Fix another uninitialized memory area --- src/ruminsert.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ruminsert.c b/src/ruminsert.c index f9ce47a30d..c56d18f983 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -789,6 +789,7 @@ rumHeapTupleInsert(RumState * rumstate, OffsetNumber attnum, NameStr(attr->attname)); } + memset(&insert_item, 0, sizeof(insert_item)); insert_item.iptr = *item; insert_item.addInfo = addInfo[i]; insert_item.addInfoIsNull = addInfoIsNull[i]; From 6230579d108adfc0b8b0f3976704bd10241346d4 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 19 Mar 2019 14:45:16 +0300 Subject: [PATCH 047/182] Add more comments into rum_validate.sql --- expected/rum_validate.out | 3 ++- sql/rum_validate.sql | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/expected/rum_validate.out b/expected/rum_validate.out index 83a312c0ed..e662615e87 100644 --- a/expected/rum_validate.out +++ b/expected/rum_validate.out @@ -90,7 +90,7 @@ FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_ -- Check incorrect operator class -- DROP INDEX rumidx; --- Check incorrect operator class +-- PGPRO-1175: Check incorrect operator class, i.e. it shouldn't work correctly CREATE OPERATOR CLASS rum_tsvector_norm_ops FOR TYPE tsvector USING rum AS @@ -110,6 +110,7 @@ CREATE INDEX rum_norm_idx ON test_rum USING rum(a rum_tsvector_norm_ops); SET enable_seqscan=off; SET enable_bitmapscan=off; SET enable_indexscan=on; +-- PGPRO-1175: Select using incorrect operator class SELECT a FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'bar') diff --git a/sql/rum_validate.sql b/sql/rum_validate.sql index 24bc4aa7c3..feb8e2765a 100644 --- a/sql/rum_validate.sql +++ b/sql/rum_validate.sql @@ -31,7 +31,7 @@ FROM unnest(array['asc','desc','nulls_first','nulls_last','orderable','distance_ DROP INDEX rumidx; --- Check incorrect operator class +-- PGPRO-1175: Check incorrect operator class, i.e. it shouldn't work correctly CREATE OPERATOR CLASS rum_tsvector_norm_ops FOR TYPE tsvector USING rum AS @@ -54,7 +54,8 @@ SET enable_seqscan=off; SET enable_bitmapscan=off; SET enable_indexscan=on; +-- PGPRO-1175: Select using incorrect operator class SELECT a FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'bar') - ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0) \ No newline at end of file + ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0) From 793829de933b6beace99d0b4194bc1116bd72d9e Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Fri, 22 Mar 2019 12:21:42 +0300 Subject: [PATCH 048/182] PGPRO-2554: Copy an additional information into the current memory In case of concurrent updates reading raw additional information from index pages may lead to undefined behaviour. --- src/rum.h | 18 ++++++++++++++---- src/rumdatapage.c | 11 ++++++----- src/rumentrypage.c | 4 ++-- src/rumget.c | 21 +++++++++++++-------- src/ruminsert.c | 2 +- src/rumvacuum.c | 2 +- 6 files changed, 37 insertions(+), 21 deletions(-) diff --git a/src/rum.h b/src/rum.h index 30d2f78daa..a90ea5b876 100644 --- a/src/rum.h +++ b/src/rum.h @@ -19,6 +19,7 @@ #include "access/sdir.h" #include "lib/rbtree.h" #include "storage/bufmgr.h" +#include "utils/datum.h" #include "rumsort.h" @@ -529,7 +530,7 @@ extern void rumEntryFillRoot(RumBtree btree, Buffer root, Buffer lbuf, Buffer rb Page page, Page lpage, Page rpage); extern IndexTuple rumPageGetLinkItup(RumBtree btree, Buffer buf, Page page); extern void rumReadTuple(RumState * rumstate, OffsetNumber attnum, - IndexTuple itup, RumItem * items); + IndexTuple itup, RumItem * items, bool copyAddInfo); extern void rumReadTuplePointers(RumState * rumstate, OffsetNumber attnum, IndexTuple itup, ItemPointerData *ipd); extern void updateItemIndexes(Page page, OffsetNumber attnum, RumState * rumstate); @@ -941,10 +942,14 @@ rumDataPageLeafReadItemPointer(char *ptr, ItemPointer iptr, bool *addInfoIsNull) * Reads next item pointer and additional information from leaf data page. * Replaces current item pointer with the next one. Zero item pointer should be * passed in order to read the first item pointer. + * + * It is necessary to pass copyAddInfo=true if additional information is used + * when the data page is unlocked. If the additional information is used without + * locking one can get unexpected behaviour. */ static inline Pointer rumDataPageLeafRead(Pointer ptr, OffsetNumber attnum, RumItem * item, - RumState * rumstate) + bool copyAddInfo, RumState * rumstate) { Form_pg_attribute attr; @@ -1009,8 +1014,13 @@ rumDataPageLeafRead(Pointer ptr, OffsetNumber attnum, RumItem * item, } else { - ptr = (Pointer) att_align_pointer(ptr, attr->attalign, attr->attlen, ptr); - item->addInfo = fetch_att(ptr, attr->attbyval, attr->attlen); + Datum addInfo; + + ptr = (Pointer) att_align_pointer(ptr, attr->attalign, attr->attlen, + ptr); + addInfo = fetch_att(ptr, attr->attbyval, attr->attlen); + item->addInfo = copyAddInfo ? + datumCopy(addInfo, attr->attbyval, attr->attlen) : addInfo; } ptr = (Pointer) att_addlength_pointer(ptr, attr->attlen, ptr); diff --git a/src/rumdatapage.c b/src/rumdatapage.c index f948a8627d..19f9c1a81f 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -589,7 +589,7 @@ findInLeafPage(RumBtree btree, Page page, OffsetNumber *offset, *iptrOut = item.iptr; ptr = rumDataPageLeafRead(ptr, btree->entryAttnum, &item, - btree->rumstate); + false, btree->rumstate); cmp = compareRumItem(btree->rumstate, btree->entryAttnum, &btree->items[btree->curitem], &item); @@ -899,7 +899,8 @@ dataPlaceToPage(RumBtree btree, Page page, OffsetNumber off) if (copyItemEmpty == true && off <= maxoff) { copyPtr = rumDataPageLeafRead(copyPtr, btree->entryAttnum, - ©Item, btree->rumstate); + ©Item, false, + btree->rumstate); copyItemEmpty = false; } @@ -1091,7 +1092,7 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, prevIptr = item.iptr; copyPtr = rumDataPageLeafRead(copyPtr, btree->entryAttnum, &item, - btree->rumstate); + false, btree->rumstate); prevTotalsize = totalsize; totalsize = rumCheckPlaceToDataPageLeaf(btree->entryAttnum, @@ -1169,7 +1170,7 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, } copyPtr = rumDataPageLeafRead(copyPtr, btree->entryAttnum, &item, - btree->rumstate); + false, btree->rumstate); curItem = item; ptr = rumPlaceToDataPageLeaf(ptr, btree->entryAttnum, &item, @@ -1351,7 +1352,7 @@ updateItemIndexes(Page page, OffsetNumber attnum, RumState * rumstate) } j++; } - ptr = rumDataPageLeafRead(ptr, attnum, &item, rumstate); + ptr = rumDataPageLeafRead(ptr, attnum, &item, false, rumstate); } /* Fill rest of page indexes with InvalidOffsetNumber if any */ for (; j < RumDataLeafIndexCount; j++) diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 7029942e78..e87d8749b8 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -21,7 +21,7 @@ */ void rumReadTuple(RumState * rumstate, OffsetNumber attnum, - IndexTuple itup, RumItem * items) + IndexTuple itup, RumItem * items, bool copyAddInfo) { Pointer ptr = RumGetPosting(itup); RumItem item; @@ -31,7 +31,7 @@ rumReadTuple(RumState * rumstate, OffsetNumber attnum, ItemPointerSetMin(&item.iptr); for (i = 0; i < nipd; i++) { - ptr = rumDataPageLeafRead(ptr, attnum, &item, rumstate); + ptr = rumDataPageLeafRead(ptr, attnum, &item, copyAddInfo, rumstate); items[i] = item; } } diff --git a/src/rumget.c b/src/rumget.c index e67c66f6aa..065853fda3 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -258,7 +258,8 @@ scanPostingTree(Relation index, RumScanEntry scanEntry, ptr = RumDataPageGetData(page); for (i = FirstOffsetNumber; i <= maxoff; i++) { - ptr = rumDataPageLeafRead(ptr, attnum, &item.item, rumstate); + ptr = rumDataPageLeafRead(ptr, attnum, &item.item, false, + rumstate); SCAN_ITEM_PUT_KEY(scanEntry, item, idatum, icategory); rum_tuplesort_putrumitem(scanEntry->matchSortstate, &item); } @@ -468,7 +469,7 @@ collectMatchBitmap(RumBtreeData * btree, RumBtreeStack * stack, for (i = 0; i < RumGetNPosting(itup); i++) { ptr = rumDataPageLeafRead(ptr, scanEntry->attnum, &item.item, - rumstate); + false, rumstate); SCAN_ITEM_PUT_KEY(scanEntry, item, idatum, icategory); rum_tuplesort_putrumitem(scanEntry->matchSortstate, &item); } @@ -674,7 +675,8 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - ptr = rumDataPageLeafRead(ptr, entry->attnum, &item, rumstate); + ptr = rumDataPageLeafRead(ptr, entry->attnum, &item, true, + rumstate); entry->list[i - FirstOffsetNumber] = item; } @@ -689,7 +691,7 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) entry->predictNumberResult = entry->nlist; entry->list = (RumItem *) palloc(sizeof(RumItem) * entry->nlist); - rumReadTuple(rumstate, entry->attnum, itup, entry->list); + rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); entry->isFinished = setListPositionScanEntry(rumstate, entry); if (!entry->isFinished) entry->curItem = entry->list[entry->offset]; @@ -935,7 +937,8 @@ entryGetNextItem(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - ptr = rumDataPageLeafRead(ptr, entry->attnum, &item, rumstate); + ptr = rumDataPageLeafRead(ptr, entry->attnum, &item, true, + rumstate); entry->list[i - FirstOffsetNumber] = item; if (searchBorder) @@ -1091,7 +1094,8 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { - ptr = rumDataPageLeafRead(ptr, entry->attnum, &item, rumstate); + ptr = rumDataPageLeafRead(ptr, entry->attnum, &item, true, + rumstate); entry->list[i - FirstOffsetNumber] = item; } @@ -1104,7 +1108,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) entry->predictNumberResult = entry->nlist; entry->list = (RumItem *) palloc(sizeof(RumItem) * entry->nlist); - rumReadTuple(rumstate, entry->attnum, itup, entry->list); + rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); entry->isFinished = setListPositionScanEntry(rumstate, entry); } @@ -1659,7 +1663,8 @@ scanPage(RumState * rumstate, RumScanEntry entry, RumItem *item, bool equalOk) bound = -1; for (i = first; i <= maxoff; i++) { - ptr = rumDataPageLeafRead(ptr, entry->attnum, &iter_item, rumstate); + ptr = rumDataPageLeafRead(ptr, entry->attnum, &iter_item, true, + rumstate); entry->list[i - first] = iter_item; if (bound != -1) diff --git a/src/ruminsert.c b/src/ruminsert.c index c56d18f983..1cd5111040 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -272,7 +272,7 @@ addItemPointersToLeafTuple(RumState * rumstate, newNPosting = oldNPosting + nitem; newItems = (RumItem *) palloc(sizeof(RumItem) * newNPosting); - rumReadTuple(rumstate, attnum, old, oldItems); + rumReadTuple(rumstate, attnum, old, oldItems, false); newNPosting = rumMergeRumItems(rumstate, attnum, newItems, items, nitem, oldItems, oldNPosting); diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 32662c65f6..1021a46468 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -63,7 +63,7 @@ rumVacuumPostingList(RumVacuumState * gvs, OffsetNumber attnum, Pointer src, for (i = 0; i < nitem; i++) { prev = ptr; - ptr = rumDataPageLeafRead(ptr, attnum, &item, &gvs->rumstate); + ptr = rumDataPageLeafRead(ptr, attnum, &item, false, &gvs->rumstate); if (gvs->callback(&item.iptr, gvs->callback_state)) { gvs->result->tuples_removed += 1; From 36e0be59c4ba83dc12389c1665dff2fe9b5ac335 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 28 Mar 2019 18:58:51 +0300 Subject: [PATCH 049/182] PGPRO-2538: Improve tests, force Index Scan --- expected/altorder.out | 9 ++++++--- expected/altorder_1.out | 9 ++++++--- sql/altorder.sql | 10 +++++++--- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/expected/altorder.out b/expected/altorder.out index 24d3e1ea79..9f7178b86a 100644 --- a/expected/altorder.out +++ b/expected/altorder.out @@ -167,9 +167,8 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (32 rows) -RESET enable_indexscan; -RESET enable_indexonlyscan; -RESET enable_bitmapscan; +-- Test bitmap index scan +SET enable_bitmapscan=on; SET enable_seqscan = off; EXPLAIN (costs off) SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; @@ -252,6 +251,10 @@ SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; 612 (1 row) +-- Test index scan +SET enable_indexscan=on; +SET enable_indexonlyscan=on; +SET enable_bitmapscan=off; EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; QUERY PLAN diff --git a/expected/altorder_1.out b/expected/altorder_1.out index 0556317f60..5bd8925d7c 100644 --- a/expected/altorder_1.out +++ b/expected/altorder_1.out @@ -168,9 +168,8 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (32 rows) -RESET enable_indexscan; -RESET enable_indexonlyscan; -RESET enable_bitmapscan; +-- Test bitmap index scan +SET enable_bitmapscan=on; SET enable_seqscan = off; EXPLAIN (costs off) SELECT count(*) FROM atsts WHERE t @@ 'wr|qh'; @@ -247,6 +246,10 @@ SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; 612 (1 row) +-- Test index scan +SET enable_indexscan=on; +SET enable_indexonlyscan=on; +SET enable_bitmapscan=off; EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; QUERY PLAN diff --git a/sql/altorder.sql b/sql/altorder.sql index 5f74b7a7b5..86858d1fbb 100644 --- a/sql/altorder.sql +++ b/sql/altorder.sql @@ -34,9 +34,8 @@ SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; -RESET enable_indexscan; -RESET enable_indexonlyscan; -RESET enable_bitmapscan; +-- Test bitmap index scan +SET enable_bitmapscan=on; SET enable_seqscan = off; EXPLAIN (costs off) @@ -56,6 +55,11 @@ EXPLAIN (costs off) SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; SELECT count(*) FROM atsts WHERE d > '2016-05-16 14:21:25'; +-- Test index scan +SET enable_indexscan=on; +SET enable_indexonlyscan=on; +SET enable_bitmapscan=off; + EXPLAIN (costs off) SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; SELECT id, d, d <=> '2016-05-16 14:21:25' FROM atsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; From 0527b2667d4588629f622a4b4371bcca6ad1a927 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Apr 2019 17:52:03 +0300 Subject: [PATCH 050/182] Integration with Travis CI --- .travis.yml | 24 +++++++++ travis/Dockerfile.in | 31 +++++++++++ travis/docker-compose.yml | 2 + travis/mk_dockerfile.sh | 16 ++++++ travis/run-tests.sh | 111 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+) create mode 100644 .travis.yml create mode 100644 travis/Dockerfile.in create mode 100644 travis/docker-compose.yml create mode 100644 travis/mk_dockerfile.sh create mode 100644 travis/run-tests.sh diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000..2e1b7a3e0c --- /dev/null +++ b/.travis.yml @@ -0,0 +1,24 @@ +sudo: required + +language: c + +services: + - docker + +install: + - ./travis/mk_dockerfile.sh + - docker-compose build + +script: + - docker-compose run $(bash <(curl -s https://codecov.io/env)) tests + +notifications: + email: + on_success: change + on_failure: always + +env: + - PG_VERSION=11 LEVEL=hardcore + - PG_VERSION=11 + - PG_VERSION=10 + - PG_VERSION=9.6 diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in new file mode 100644 index 0000000000..e25db7ac70 --- /dev/null +++ b/travis/Dockerfile.in @@ -0,0 +1,31 @@ +FROM postgres:${PG_VERSION}-alpine + +# Install dependencies +RUN apk add --no-cache \ + openssl curl \ + perl perl-ipc-run \ + make musl-dev gcc bison flex coreutils \ + zlib-dev libedit-dev \ + clang clang-analyzer; + +# Environment +ENV LANG=C.UTF-8 PGDATA=/pg/data + +# Make directories +RUN mkdir -p ${PGDATA} && \ + mkdir -p /pg/testdir + +# Grant privileges +RUN chown postgres:postgres ${PGDATA} && \ + chown postgres:postgres /pg/testdir && \ + chmod a+rwx /usr/local/lib/postgresql && \ + chmod a+rwx /usr/local/share/postgresql/extension + +COPY travis/run_tests.sh /run.sh +RUN chmod 755 /run.sh + +COPY . /pg/testdir +WORKDIR /pg/testdir + +USER postgres +ENTRYPOINT LEVEL=${LEVEL} /run.sh diff --git a/travis/docker-compose.yml b/travis/docker-compose.yml new file mode 100644 index 0000000000..471ab779f2 --- /dev/null +++ b/travis/docker-compose.yml @@ -0,0 +1,2 @@ +tests: + build: . diff --git a/travis/mk_dockerfile.sh b/travis/mk_dockerfile.sh new file mode 100644 index 0000000000..927bf8fc29 --- /dev/null +++ b/travis/mk_dockerfile.sh @@ -0,0 +1,16 @@ +if [ -z ${PG_VERSION+x} ]; then + echo PG_VERSION is not set! + exit 1 +fi + +if [ -z ${LEVEL+x} ]; then + LEVEL=scan-build +fi + +echo PG_VERSION=${PG_VERSION} +echo LEVEL=${LEVEL} + +sed \ + -e 's/${PG_VERSION}/'${PG_VERSION}/g \ + -e 's/${LEVEL}/'${LEVEL}/g \ +Dockerfile.tmpl > Dockerfile diff --git a/travis/run-tests.sh b/travis/run-tests.sh new file mode 100644 index 0000000000..55224864e6 --- /dev/null +++ b/travis/run-tests.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash + +# +# Copyright (c) 2019, Postgres Professional +# +# supported levels: +# * standard +# * scan-build +# * hardcore +# + +set -ux +status=0 + + +# rebuild PostgreSQL with cassert support +if [ "$LEVEL" = "hardcore" ]; then + + set -e + + CUSTOM_PG_BIN=$PWD/pg_bin + CUSTOM_PG_SRC=$PWD/postgresql + + # here PG_VERSION is provided by postgres:X-alpine docker image + curl "https://ftp.postgresql.org/pub/source/v$PG_VERSION/postgresql-$PG_VERSION.tar.bz2" -o postgresql.tar.bz2 + echo "$PG_SHA256 *postgresql.tar.bz2" | sha256sum -c - + + mkdir $CUSTOM_PG_SRC + + tar \ + --extract \ + --file postgresql.tar.bz2 \ + --directory $CUSTOM_PG_SRC \ + --strip-components 1 + + cd $CUSTOM_PG_SRC + + # enable additional options + ./configure \ + CFLAGS='-O0 -ggdb3 -fno-omit-frame-pointer' \ + --enable-cassert \ + --prefix=$CUSTOM_PG_BIN \ + --quiet + + time make -s -j$(nproc) && make -s install + + # override default PostgreSQL instance + export PATH=$CUSTOM_PG_BIN/bin:$PATH + export LD_LIBRARY_PATH=$CUSTOM_PG_BIN/lib + + # show pg_config path (just in case) + which pg_config + + cd - + + set +e +fi + +# show pg_config just in case +pg_config + +# perform code checks if asked to +if [ "$LEVEL" = "scan-build" ] || \ + [ "$LEVEL" = "hardcore" ]; then + + # perform static analyzis + scan-build --status-bugs make USE_PGXS=1 || status=$? + + # something's wrong, exit now! + if [ $status -ne 0 ]; then exit 1; fi + + # don't forget to "make clean" + make USE_PGXS=1 clean +fi + + +# build and install extension (using PG_CPPFLAGS and SHLIB_LINK for gcov) +make USE_PGXS=1 PG_CPPFLAGS="-coverage" SHLIB_LINK="-coverage" install + +# initialize database +initdb -D $PGDATA + +# set appropriate port +export PGPORT=55435 +echo "port = $PGPORT" >> $PGDATA/postgresql.conf + +# restart cluster 'test' +pg_ctl start -l /tmp/postgres.log -w || status=$? + +# something's wrong, exit now! +if [ $status -ne 0 ]; then cat /tmp/postgres.log; exit 1; fi + +# run regression tests +export PG_REGRESS_DIFF_OPTS="-w -U3" # for alpine's diff (BusyBox) +make USE_PGXS=1 installcheck || status=$? + +# show diff if it exists +if test -f regression.diffs; then cat regression.diffs; fi + +# something's wrong, exit now! +if [ $status -ne 0 ]; then exit 1; fi + +# generate *.gcov files +gcov *.c *.h + + +set +ux + + +# send coverage stats to Codecov +bash <(curl -s https://codecov.io/bash) From aa33f7e301092382f162a6fc508fb34acfb7975d Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Apr 2019 18:21:16 +0300 Subject: [PATCH 051/182] Add execute permissions to travis/mk_dockerfile.sh --- travis/mk_dockerfile.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 travis/mk_dockerfile.sh diff --git a/travis/mk_dockerfile.sh b/travis/mk_dockerfile.sh old mode 100644 new mode 100755 From bf0ff146535b6480f01b268c4511f969bce3f963 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Apr 2019 18:23:32 +0300 Subject: [PATCH 052/182] Fix travis/mk_dockerfile.sh paths --- travis/mk_dockerfile.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/travis/mk_dockerfile.sh b/travis/mk_dockerfile.sh index 927bf8fc29..48169488f6 100755 --- a/travis/mk_dockerfile.sh +++ b/travis/mk_dockerfile.sh @@ -13,4 +13,4 @@ echo LEVEL=${LEVEL} sed \ -e 's/${PG_VERSION}/'${PG_VERSION}/g \ -e 's/${LEVEL}/'${LEVEL}/g \ -Dockerfile.tmpl > Dockerfile +Dockerfile.in > Dockerfile From d158e41938b1038e745c56820d961d491c1077ef Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Apr 2019 18:50:28 +0300 Subject: [PATCH 053/182] Fix paths for Travis CI --- .dockerignore | 3 +++ .travis.yml | 5 ++++- travis/Dockerfile.in | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000000..f5c659e5af --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +# Don't send some content to the Docker host when building +.git +.travis.yml diff --git a/.travis.yml b/.travis.yml index 2e1b7a3e0c..9dd95cb320 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,8 +5,11 @@ language: c services: - docker +before_install: + - cp travis/* . + install: - - ./travis/mk_dockerfile.sh + - ./mk_dockerfile.sh - docker-compose build script: diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in index e25db7ac70..38a73ae93e 100644 --- a/travis/Dockerfile.in +++ b/travis/Dockerfile.in @@ -21,7 +21,7 @@ RUN chown postgres:postgres ${PGDATA} && \ chmod a+rwx /usr/local/lib/postgresql && \ chmod a+rwx /usr/local/share/postgresql/extension -COPY travis/run_tests.sh /run.sh +COPY run_tests.sh /run.sh RUN chmod 755 /run.sh COPY . /pg/testdir From df75d406f5fd9fcb233933008dc5a0f6186a24ce Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Apr 2019 18:53:11 +0300 Subject: [PATCH 054/182] Rename travis/run_tests.sh --- travis/{run-tests.sh => run_tests.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename travis/{run-tests.sh => run_tests.sh} (100%) diff --git a/travis/run-tests.sh b/travis/run_tests.sh similarity index 100% rename from travis/run-tests.sh rename to travis/run_tests.sh From 78ec4f36868983127a992c7f15c0842a529c7d81 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 15 Apr 2019 19:02:29 +0300 Subject: [PATCH 055/182] Recursively chown --- .dockerignore | 2 ++ travis/Dockerfile.in | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.dockerignore b/.dockerignore index f5c659e5af..989964094e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,5 @@ # Don't send some content to the Docker host when building +img +travis .git .travis.yml diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in index 38a73ae93e..173212e96e 100644 --- a/travis/Dockerfile.in +++ b/travis/Dockerfile.in @@ -15,17 +15,17 @@ ENV LANG=C.UTF-8 PGDATA=/pg/data RUN mkdir -p ${PGDATA} && \ mkdir -p /pg/testdir -# Grant privileges -RUN chown postgres:postgres ${PGDATA} && \ - chown postgres:postgres /pg/testdir && \ - chmod a+rwx /usr/local/lib/postgresql && \ - chmod a+rwx /usr/local/share/postgresql/extension - COPY run_tests.sh /run.sh RUN chmod 755 /run.sh COPY . /pg/testdir WORKDIR /pg/testdir +# Grant privileges +RUN chown postgres:postgres ${PGDATA} && \ + chown -R postgres:postgres /pg/testdir && \ + chmod a+rwx /usr/local/lib/postgresql && \ + chmod a+rwx /usr/local/share/postgresql/extension + USER postgres ENTRYPOINT LEVEL=${LEVEL} /run.sh From e9c53b5d2e8dc9c82340b8e38213ba9aea523064 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 16 Apr 2019 16:40:56 +0300 Subject: [PATCH 056/182] Fixes per clang analyzer complains --- src/rum_arr_utils.c | 1 + src/rum_ts_utils.c | 2 +- src/rumget.c | 4 ++-- src/rumsort.c | 1 - src/rumvacuum.c | 1 + travis/mk_dockerfile.sh | 2 +- travis/run_tests.sh | 4 +--- 7 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 78b788eb8b..1b51a3a877 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -733,6 +733,7 @@ cmpAscArrayElem(const void *a, const void *b, void *arg) { FmgrInfo *cmpFunc = (FmgrInfo*)arg; + Assert(a && b); return DatumGetInt32(FunctionCall2Coll(cmpFunc, DEFAULT_COLLATION_OID, *(Datum*)a, *(Datum*)b)); } diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index b43ac3b0a3..073f90bdfe 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -1242,7 +1242,7 @@ calc_score_docr(float4 *arrdata, DocRepresentation *doc, uint32 doclen, int new_cover_key = 0; int nitems = 0; - while (ptr <= ext.end) + while (ptr && ptr <= ext.end) { InvSum += arrdata[ptr->wclass]; /* SK: Quick and dirty hash key. Hope collisions will be not too frequent. */ diff --git a/src/rumget.c b/src/rumget.c index 065853fda3..de70664c0f 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -1112,7 +1112,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) entry->isFinished = setListPositionScanEntry(rumstate, entry); } - Assert(entry->nlist > 0); + Assert(entry->nlist > 0 && entry->list); entry->curItem = entry->list[entry->offset]; entry->offset += entry->scanDirection; @@ -1169,7 +1169,7 @@ entryGetItem(RumState * rumstate, RumScanEntry entry, bool *nextEntryList, Snaps if (!ItemPointerIsMin(&entry->collectRumItem.item.iptr)) collected = entry->collectRumItem; else - ItemPointerSetMin(&collected.item.iptr); + MemSet(&collected, 0, sizeof(collected)); ItemPointerSetMin(&entry->curItem.iptr); diff --git a/src/rumsort.c b/src/rumsort.c index a47801c6b7..4c285d91de 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -2426,7 +2426,6 @@ mergeruns(RumTuplesortstate *state) else { numInputTapes = state->tapeRange; - numTapes = state->maxTapes; } state->read_buffer_size = Max(state->availMem / numInputTapes, 0); diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 1021a46468..1285c9d4ed 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -53,6 +53,7 @@ rumVacuumPostingList(RumVacuumState * gvs, OffsetNumber attnum, Pointer src, prev, ptr = src; + *newSize = 0; ItemPointerSetMin(&item.iptr); /* diff --git a/travis/mk_dockerfile.sh b/travis/mk_dockerfile.sh index 48169488f6..9108d2c68d 100755 --- a/travis/mk_dockerfile.sh +++ b/travis/mk_dockerfile.sh @@ -4,7 +4,7 @@ if [ -z ${PG_VERSION+x} ]; then fi if [ -z ${LEVEL+x} ]; then - LEVEL=scan-build + LEVEL=standard fi echo PG_VERSION=${PG_VERSION} diff --git a/travis/run_tests.sh b/travis/run_tests.sh index 55224864e6..13a737f24c 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -5,7 +5,6 @@ # # supported levels: # * standard -# * scan-build # * hardcore # @@ -60,8 +59,7 @@ fi pg_config # perform code checks if asked to -if [ "$LEVEL" = "scan-build" ] || \ - [ "$LEVEL" = "hardcore" ]; then +if [ "$LEVEL" = "hardcore" ]; then # perform static analyzis scan-build --status-bugs make USE_PGXS=1 || status=$? From 14e97be2abb29e09744efab87627e58a06fb5fcc Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 16 Apr 2019 16:52:55 +0300 Subject: [PATCH 057/182] Grant privileges on postgresql/server/ within Docker --- travis/Dockerfile.in | 1 + 1 file changed, 1 insertion(+) diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in index 173212e96e..6c839839f0 100644 --- a/travis/Dockerfile.in +++ b/travis/Dockerfile.in @@ -24,6 +24,7 @@ WORKDIR /pg/testdir # Grant privileges RUN chown postgres:postgres ${PGDATA} && \ chown -R postgres:postgres /pg/testdir && \ + chown postgres:postgres /usr/local/include/postgresql/server/ && \ chmod a+rwx /usr/local/lib/postgresql && \ chmod a+rwx /usr/local/share/postgresql/extension From 6636f3949d05f9915ceb01be1cdfd258a4acf81c Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 16 Apr 2019 18:50:27 +0300 Subject: [PATCH 058/182] Check array for NULL, fix gcov call --- .dockerignore | 6 ++++++ src/rum_arr_utils.c | 6 +++++- travis/run_tests.sh | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/.dockerignore b/.dockerignore index 989964094e..5ad68edc33 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,3 +3,9 @@ img travis .git .travis.yml + +*.gcno +*.gcda +*.gcov +*.so +*.o diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 1b51a3a877..da3f2f3c52 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -51,7 +51,11 @@ #define CHECKARRVALID(x) \ do { \ - if (x) { \ + if (x == NULL) \ + ereport(ERROR, \ + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), \ + errmsg("array must not be NULL"))); \ + else if (x) { \ if (ARR_NDIM(x) != NDIM && ARR_NDIM(x) != 0) \ ereport(ERROR, \ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), \ diff --git a/travis/run_tests.sh b/travis/run_tests.sh index 13a737f24c..c1f598a196 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -99,7 +99,7 @@ if test -f regression.diffs; then cat regression.diffs; fi if [ $status -ne 0 ]; then exit 1; fi # generate *.gcov files -gcov *.c *.h +gcov src/*.c src/*.h set +ux From f342245ea023769ee5c2d2496b27b662d5b18253 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 16 Apr 2019 18:58:51 +0300 Subject: [PATCH 059/182] Add Travis CI badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index dd575c479c..2692df1c87 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![Build Status](https://api.travis-ci.com/postgrespro/rum.svg?branch=master)](https://travis-ci.com/postgrespro/rum) + [![PGXN version](https://badge.fury.io/pg/rum.svg)](https://badge.fury.io/pg/rum) [![GitHub license](https://img.shields.io/badge/license-PostgreSQL-blue.svg)](https://raw.githubusercontent.com/postgrespro/rum/master/LICENSE) From 9a3e47581c869b017083fdfcf15ec67eb559d834 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 16 Apr 2019 19:00:18 +0300 Subject: [PATCH 060/182] Remove Codecov badge comment --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 2692df1c87..98a13bb17f 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ [![Build Status](https://api.travis-ci.com/postgrespro/rum.svg?branch=master)](https://travis-ci.com/postgrespro/rum) - [![PGXN version](https://badge.fury.io/pg/rum.svg)](https://badge.fury.io/pg/rum) [![GitHub license](https://img.shields.io/badge/license-PostgreSQL-blue.svg)](https://raw.githubusercontent.com/postgrespro/rum/master/LICENSE) From 16afcc79f57c4a73fe3cfdaff4a9cc5b97bfd38f Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 19 Jun 2019 15:44:58 +0300 Subject: [PATCH 061/182] Issue #61: Fixes for PostgreSQL 12 - Update tests to handle changes in floats and drandom(). - Delete unused code in rumsort.c, it allows to compile under PG 12. This code isn't necessary anyway. --- expected/array.out | 30 +- expected/predicate-rum-2_1.out | 501 +++++++++ expected/predicate-rum_1.out | 521 ++++++++++ expected/rum.out | 180 ++-- expected/rum_hash.out | 154 +-- sql/array.sql | 8 +- sql/rum.sql | 36 +- sql/rum_hash.sql | 30 +- src/rum.h | 1 + src/rumbulk.c | 1 - src/rumget.c | 15 +- src/ruminsert.c | 10 +- src/rumscan.c | 1 - src/rumsort.c | 1732 +------------------------------- src/rumsort.h | 95 +- src/rumutil.c | 35 +- tests/README.md | 2 +- 17 files changed, 1355 insertions(+), 1997 deletions(-) create mode 100644 expected/predicate-rum-2_1.out create mode 100644 expected/predicate-rum_1.out diff --git a/expected/array.out b/expected/array.out index 92864d95e4..4094d98492 100644 --- a/expected/array.out +++ b/expected/array.out @@ -858,20 +858,26 @@ SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> Order By: (i <=> '{51}'::smallint[]) (3 rows) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - i | ?column? +SELECT i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + i | distance ---------------------+------------------ {20,23,51} | 1.73205080756888 {33,51,20,77,23,65} | 2.44948974278318 - {23,76,34,23,2,20} | Infinity - {20,60,45,23,29} | Infinity - {23,89,38,20,40,95} | Infinity - {23,20,72} | Infinity - {73,23,20} | Infinity - {6,97,20,89,23} | Infinity - {20,98,30,23,1,66} | Infinity - {57,23,39,46,50,20} | Infinity - {81,20,26,22,23} | Infinity - {18,23,10,90,15,20} | Infinity + {23,76,34,23,2,20} | -1 + {20,60,45,23,29} | -1 + {23,89,38,20,40,95} | -1 + {23,20,72} | -1 + {73,23,20} | -1 + {6,97,20,89,23} | -1 + {20,98,30,23,1,66} | -1 + {57,23,39,46,50,20} | -1 + {81,20,26,22,23} | -1 + {18,23,10,90,15,20} | -1 (12 rows) diff --git a/expected/predicate-rum-2_1.out b/expected/predicate-rum-2_1.out new file mode 100644 index 0000000000..8d3e0d173c --- /dev/null +++ b/expected/predicate-rum-2_1.out @@ -0,0 +1,501 @@ +Parsed test spec with 2 sessions + +starting permutation: rxy1 wx1 c1 rxy2 wy2 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; + +starting permutation: rxy1 wx1 rxy2 c1 wy2 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step c1: COMMIT; +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; + +starting permutation: rxy1 wx1 rxy2 wy2 c1 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c1: COMMIT; +step c2: COMMIT; + +starting permutation: rxy1 wx1 rxy2 wy2 c2 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; +step c1: COMMIT; + +starting permutation: rxy1 rxy2 wx1 c1 wy2 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; + +starting permutation: rxy1 rxy2 wx1 wy2 c1 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c1: COMMIT; +step c2: COMMIT; + +starting permutation: rxy1 rxy2 wx1 wy2 c2 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; +step c1: COMMIT; + +starting permutation: rxy1 rxy2 wy2 wx1 c1 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; +step c2: COMMIT; + +starting permutation: rxy1 rxy2 wy2 wx1 c2 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c2: COMMIT; +step c1: COMMIT; + +starting permutation: rxy1 rxy2 wy2 c2 wx1 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; + +starting permutation: rxy2 rxy1 wx1 c1 wy2 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; + +starting permutation: rxy2 rxy1 wx1 wy2 c1 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c1: COMMIT; +step c2: COMMIT; + +starting permutation: rxy2 rxy1 wx1 wy2 c2 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; +step c1: COMMIT; + +starting permutation: rxy2 rxy1 wy2 wx1 c1 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; +step c2: COMMIT; + +starting permutation: rxy2 rxy1 wy2 wx1 c2 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c2: COMMIT; +step c1: COMMIT; + +starting permutation: rxy2 rxy1 wy2 c2 wx1 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; + +starting permutation: rxy2 wy2 rxy1 wx1 c1 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; +step c2: COMMIT; + +starting permutation: rxy2 wy2 rxy1 wx1 c2 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c2: COMMIT; +step c1: COMMIT; + +starting permutation: rxy2 wy2 rxy1 c2 wx1 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step c2: COMMIT; +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; + +starting permutation: rxy2 wy2 c2 rxy1 wx1 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('xz'); +step c2: COMMIT; +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('ab'); +step c1: COMMIT; diff --git a/expected/predicate-rum_1.out b/expected/predicate-rum_1.out new file mode 100644 index 0000000000..7f7a0dc9a6 --- /dev/null +++ b/expected/predicate-rum_1.out @@ -0,0 +1,521 @@ +Parsed test spec with 2 sessions + +starting permutation: rxy1 wx1 c1 rxy2 wy2 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +677 'qh' +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; + +starting permutation: rxy1 wx1 rxy2 c1 wy2 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step c1: COMMIT; +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +ERROR: could not serialize access due to read/write dependencies among transactions +step c2: COMMIT; + +starting permutation: rxy1 wx1 rxy2 wy2 c1 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c1: COMMIT; +step c2: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy1 wx1 rxy2 wy2 c2 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; +step c1: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy1 rxy2 wx1 c1 wy2 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +ERROR: could not serialize access due to read/write dependencies among transactions +step c2: COMMIT; + +starting permutation: rxy1 rxy2 wx1 wy2 c1 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c1: COMMIT; +step c2: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy1 rxy2 wx1 wy2 c2 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; +step c1: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy1 rxy2 wy2 wx1 c1 c2 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; +step c2: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy1 rxy2 wy2 wx1 c2 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c2: COMMIT; +step c1: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy1 rxy2 wy2 c2 wx1 c1 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +ERROR: could not serialize access due to read/write dependencies among transactions +step c1: COMMIT; + +starting permutation: rxy2 rxy1 wx1 c1 wy2 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +ERROR: could not serialize access due to read/write dependencies among transactions +step c2: COMMIT; + +starting permutation: rxy2 rxy1 wx1 wy2 c1 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c1: COMMIT; +step c2: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy2 rxy1 wx1 wy2 c2 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; +step c1: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy2 rxy1 wy2 wx1 c1 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; +step c2: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy2 rxy1 wy2 wx1 c2 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c2: COMMIT; +step c1: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy2 rxy1 wy2 c2 wx1 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +ERROR: could not serialize access due to read/write dependencies among transactions +step c1: COMMIT; + +starting permutation: rxy2 wy2 rxy1 wx1 c1 c2 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; +step c2: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy2 wy2 rxy1 wx1 c2 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c2: COMMIT; +step c1: COMMIT; +ERROR: could not serialize access due to read/write dependencies among transactions + +starting permutation: rxy2 wy2 rxy1 c2 wx1 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +step c2: COMMIT; +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +ERROR: could not serialize access due to read/write dependencies among transactions +step c1: COMMIT; + +starting permutation: rxy2 wy2 c2 rxy1 wx1 c1 +step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; +id tsv + +424 'qh':1 'su':3 'vf':2 +238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 +299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 +324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 +413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 +147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 +step wy2: INSERT INTO rum_tbl(tsv) values('hx'); +step c2: COMMIT; +step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; +id tsv + +195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 +131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 +141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 +148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 +206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 +45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 +162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 +677 'hx' +step wx1: INSERT INTO rum_tbl(tsv) values('qh'); +step c1: COMMIT; diff --git a/expected/rum.out b/expected/rum.out index ad960650d2..db08b158dd 100644 --- a/expected/rum.out +++ b/expected/rum.out @@ -132,8 +132,8 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 1 (1 row) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,7), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') @@ -146,33 +146,33 @@ SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,6), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); rum_ts_distance | rum_ts_score | t | a -----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + 8.2247 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.017369 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) SELECT - a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), - rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + (a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4) AS distance, + rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), * FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; - ?column? | rum_ts_distance | t | a + distance | rum_ts_distance | t | a ----------+-----------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 8.22467 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 8.2247 | 8.2247 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 57.5727 | 57.5727 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,7), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') @@ -185,16 +185,16 @@ SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), - rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,4), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,6), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); rum_ts_distance | rum_ts_score | t | a -----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + 8.2247 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.017369 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) INSERT INTO test_rum (t) VALUES ('foo bar foo the over foo qq bar'); @@ -232,65 +232,71 @@ SELECT a FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'bar') ORDER (1 row) -- Check full-index scan with order by -SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote'); - ?column? +SELECT + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(10,4) + END distance + FROM + (SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') AS distance + FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote')) t; + distance ---------- 16.4493 16.4493 - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 (56 rows) CREATE TABLE tst (i int4, t tsvector); @@ -325,15 +331,15 @@ SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), * Order By: (a <=> '''w'':*'::tsquery) (3 rows) -SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), * +SELECT (a <=> to_tsquery('pg_catalog.english', 'w:*'))::numeric(10,4) AS distance, * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'w:*') ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*'); - ?column? | t | a + distance | t | a ----------+--------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------- - 8.22467 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 - 8.22467 | wine, but wouldn't you divide with your neighbors! The columns in the | 'column':11 'divid':6 'neighbor':9 'wine':1 'wouldn':3 - 8.22467 | not say, but you wrote as if you knew it by sight as well as by heart. | 'heart':17 'knew':9 'say':2 'sight':12 'well':14 'wrote':5 + 8.2247 | so well that only a fragment, as it were, gave way. It still hangs as if | 'fragment':6 'gave':10 'hang':14 'still':13 'way':11 'well':2 + 8.2247 | wine, but wouldn't you divide with your neighbors! The columns in the | 'column':11 'divid':6 'neighbor':9 'wine':1 'wouldn':3 + 8.2247 | not say, but you wrote as if you knew it by sight as well as by heart. | 'heart':17 'knew':9 'say':2 'sight':12 'well':14 'wrote':5 16.4493 | little series of pictures. Have you ever been here, I wonder? You did | 'ever':7 'littl':1 'pictur':4 'seri':2 'wonder':11 16.4493 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 16.4493 | _berg_, "the Jettenhuhl, a wooded spur of the Konigestuhl." Look at it | 'berg':1 'jettenhuhl':3 'konigestuhl':9 'look':10 'spur':6 'wood':5 @@ -347,16 +353,16 @@ SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), * 16.4493 | my appreciation of you in a more complimentary way than by sending this | 'appreci':2 'complimentari':8 'send':12 'way':9 (14 rows) -SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), * +SELECT (a <=> to_tsquery('pg_catalog.english', 'b:*'))::numeric(10,4) AS distance, * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'b:*') ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*'); - ?column? | t | a + distance | t | a ----------+--------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------- - 8.22467 | been trying my best to get all those "passes" into my brain. Now, thanks | 'best':4 'brain':12 'get':6 'pass':9 'thank':14 'tri':2 - 8.22467 | All the above information, I beg you to believe, I do not intend you | 'beg':6 'believ':9 'inform':4 'intend':13 - 8.22467 | curious spectacle, but on the whole had "the banquet-hall deserted" | 'banquet':10 'banquet-hal':9 'curious':1 'desert':12 'hall':11 'spectacl':2 'whole':6 - 8.22467 | oaks, limes and maples, bordered with flower-beds and shrubberies, and | 'bed':9 'border':5 'flower':8 'flower-b':7 'lime':2 'mapl':4 'oak':1 'shrubberi':11 + 8.2247 | been trying my best to get all those "passes" into my brain. Now, thanks | 'best':4 'brain':12 'get':6 'pass':9 'thank':14 'tri':2 + 8.2247 | All the above information, I beg you to believe, I do not intend you | 'beg':6 'believ':9 'inform':4 'intend':13 + 8.2247 | curious spectacle, but on the whole had "the banquet-hall deserted" | 'banquet':10 'banquet-hal':9 'curious':1 'desert':12 'hall':11 'spectacl':2 'whole':6 + 8.2247 | oaks, limes and maples, bordered with flower-beds and shrubberies, and | 'bed':9 'border':5 'flower':8 'flower-b':7 'lime':2 'mapl':4 'oak':1 'shrubberi':11 13.1595 | foo bar foo the over foo qq bar | 'bar':2,8 'foo':1,3,6 'qq':7 16.4493 | ornamental building, and I wish you could see it, if you have not seen | 'build':2 'could':7 'ornament':1 'see':8 'seen':14 'wish':5 16.4493 | the--nearest guide-book! | 'book':5 'guid':4 'guide-book':3 'nearest':2 @@ -375,14 +381,14 @@ SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), * 16.4493 | the few that escaped destruction in 1693. It is a beautiful, highly | '1693':7 'beauti':11 'destruct':5 'escap':4 'high':12 (20 rows) -select 'bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery; - ?column? +select ('bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; + distance ---------- 8.22467 (1 row) -SELECT 'stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery; - ?column? +SELECT ('stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; + distance ---------- 2.05617 (1 row) diff --git a/expected/rum_hash.out b/expected/rum_hash.out index 4838be4e93..43a9760a28 100644 --- a/expected/rum_hash.out +++ b/expected/rum_hash.out @@ -118,8 +118,8 @@ SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 1 (1 row) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,7), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') @@ -132,34 +132,34 @@ SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,6), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); rum_ts_distance | rum_ts_score | t | a -----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + 8.2247 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.017369 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) SELECT - a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), - rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + (a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4) AS distance, + rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,6), * FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; - ?column? | rum_ts_distance | rum_ts_score | t | a + distance | rum_ts_distance | rum_ts_score | t | a ----------+-----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + 8.2247 | 8.2247 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 57.5727 | 0.017369 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,7), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') @@ -172,16 +172,16 @@ SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), 16.4493 | 0.0607927 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (4 rows) -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), - rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,4), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,6), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); rum_ts_distance | rum_ts_score | t | a -----------------+--------------+---------------------------------------------------------------------+--------------------------------------------------------- - 8.22467 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 - 57.5727 | 0.0173693 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 + 8.2247 | 0.121585 | itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 + 57.5727 | 0.017369 | thinking--"to go or not to go?" We are this far on the way. Reached | 'far':11 'go':3,7 'reach':15 'think':1 'way':14 (2 rows) INSERT INTO test_rum_hash (t) VALUES ('foo bar foo the over foo qq bar'); @@ -219,65 +219,71 @@ SELECT a FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'bar') O (1 row) -- Check full-index scan with order by -SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote'); - ?column? +SELECT + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(10,4) + END distance + FROM + (SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') AS distance + FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote')) t; + distance ---------- 16.4493 16.4493 - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity - Infinity + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 + -1 (56 rows) CREATE TABLE tst_hash (i int4, t tsvector); diff --git a/sql/array.sql b/sql/array.sql index e3869b06d3..11defc9e07 100644 --- a/sql/array.sql +++ b/sql/array.sql @@ -210,4 +210,10 @@ CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); EXPLAIN (COSTS OFF) SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; +SELECT i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; diff --git a/sql/rum.sql b/sql/rum.sql index 8b8607faa6..de432fde1a 100644 --- a/sql/rum.sql +++ b/sql/rum.sql @@ -47,34 +47,34 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'def <-> fgr'); SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'def <2> fgr'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,7), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,6), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); SELECT - a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), - rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + (a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4) AS distance, + rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), * FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,7), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), - rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,4), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,6), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') @@ -93,7 +93,13 @@ SELECT count(*) FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'rat') SELECT a FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'bar') ORDER BY a; -- Check full-index scan with order by -SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote'); +SELECT + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(10,4) + END distance + FROM + (SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') AS distance + FROM test_rum ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote')) t; CREATE TABLE tst (i int4, t tsvector); INSERT INTO tst SELECT i%10, to_tsvector('simple', substr(md5(i::text), 1, 1)) FROM generate_series(1,100000) i; @@ -126,14 +132,14 @@ SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'w:*') ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*'); -SELECT a <=> to_tsquery('pg_catalog.english', 'w:*'), * +SELECT (a <=> to_tsquery('pg_catalog.english', 'w:*'))::numeric(10,4) AS distance, * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'w:*') ORDER BY a <=> to_tsquery('pg_catalog.english', 'w:*'); -SELECT a <=> to_tsquery('pg_catalog.english', 'b:*'), * +SELECT (a <=> to_tsquery('pg_catalog.english', 'b:*'))::numeric(10,4) AS distance, * FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'b:*') ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*'); -select 'bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery; -SELECT 'stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery; +select ('bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; +SELECT ('stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; diff --git a/sql/rum_hash.sql b/sql/rum_hash.sql index 511e772da5..a33b8fde31 100644 --- a/sql/rum_hash.sql +++ b/sql/rum_hash.sql @@ -35,35 +35,35 @@ SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'def <-> fgr'); SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'def <2> fgr'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'))::numeric(10,7), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,6), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'); SELECT - a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'), - rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)')), + (a <=> to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4) AS distance, + rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way & (go | half)'))::numeric(10,6), * FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'way & (go | half)') limit 2; -- Check ranking normalization -SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0), - rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0), +SELECT rum_ts_distance(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,4), + rum_ts_score(a, to_tsquery('pg_catalog.english', 'way'), 0)::numeric(10,7), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way') ORDER BY a <=> to_tsquery('pg_catalog.english', 'way'); -SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), - rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query), +SELECT rum_ts_distance(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,4), + rum_ts_score(a, row(to_tsquery('pg_catalog.english', 'way & (go | half)'), 0)::rum_distance_query)::numeric(10,6), * FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'way & (go | half)') @@ -82,7 +82,13 @@ SELECT count(*) FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', ' SELECT a FROM test_rum_hash WHERE a @@ to_tsquery('pg_catalog.english', 'bar') ORDER BY a; -- Check full-index scan with order by -SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote'); +SELECT + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(10,4) + END distance + FROM + (SELECT a <=> to_tsquery('pg_catalog.english', 'ever|wrote') AS distance + FROM test_rum_hash ORDER BY a <=> to_tsquery('pg_catalog.english', 'ever|wrote')) t; CREATE TABLE tst_hash (i int4, t tsvector); INSERT INTO tst_hash SELECT i%10, to_tsvector('simple', substr(md5(i::text), 1, 1)) FROM generate_series(1,100000) i; diff --git a/src/rum.h b/src/rum.h index a90ea5b876..ad0fa47e6e 100644 --- a/src/rum.h +++ b/src/rum.h @@ -20,6 +20,7 @@ #include "lib/rbtree.h" #include "storage/bufmgr.h" #include "utils/datum.h" +#include "utils/memutils.h" #include "rumsort.h" diff --git a/src/rumbulk.c b/src/rumbulk.c index a84661484e..3c07ef5850 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -14,7 +14,6 @@ #include "postgres.h" #include "utils/datum.h" -#include "utils/memutils.h" #include "rum.h" diff --git a/src/rumget.c b/src/rumget.c index de70664c0f..0a77293e05 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -22,7 +22,6 @@ #if PG_VERSION_NUM >= 120000 #include "utils/float.h" #endif -#include "utils/memutils.h" #include "rum.h" @@ -2357,6 +2356,14 @@ rumgettuple(IndexScanDesc scan, ScanDirection direction) RumSortItem *item; bool should_free; +#if PG_VERSION_NUM >= 120000 +#define GET_SCAN_TID(scan) ((scan)->xs_heaptid) +#define SET_SCAN_TID(scan, tid) ((scan)->xs_heaptid = (tid)) +#else +#define GET_SCAN_TID(scan) ((scan)->xs_ctup.t_self) +#define SET_SCAN_TID(scan, tid) ((scan)->xs_ctup.t_self = (tid)) +#endif + if (so->firstCall) { /* @@ -2389,7 +2396,7 @@ rumgettuple(IndexScanDesc scan, ScanDirection direction) { if (scanGetItem(scan, &so->item, &so->item, &recheck)) { - scan->xs_ctup.t_self = so->item.iptr; + SET_SCAN_TID(scan, so->item.iptr); scan->xs_recheck = recheck; scan->xs_recheckorderby = false; @@ -2411,7 +2418,7 @@ rumgettuple(IndexScanDesc scan, ScanDirection direction) uint32 i, j = 0; - if (rumCompareItemPointers(&scan->xs_ctup.t_self, &item->iptr) == 0) + if (rumCompareItemPointers(&GET_SCAN_TID(scan), &item->iptr) == 0) { if (should_free) pfree(item); @@ -2419,7 +2426,7 @@ rumgettuple(IndexScanDesc scan, ScanDirection direction) continue; } - scan->xs_ctup.t_self = item->iptr; + SET_SCAN_TID(scan, item->iptr); scan->xs_recheck = item->recheck; scan->xs_recheckorderby = false; diff --git a/src/ruminsert.c b/src/ruminsert.c index 1cd5111040..dad7b33b3a 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -14,10 +14,12 @@ #include "postgres.h" #include "access/generic_xlog.h" +#if PG_VERSION_NUM >= 120000 +#include "access/tableam.h" +#endif #include "storage/predicate.h" #include "catalog/index.h" #include "miscadmin.h" -#include "utils/memutils.h" #include "utils/datum.h" #include "rum.h" @@ -32,7 +34,11 @@ typedef struct BuildAccumulator accum; } RumBuildState; -#if PG_VERSION_NUM >= 110000 + +#if PG_VERSION_NUM >= 120000 +#define IndexBuildHeapScan(A, B, C, D, E, F) \ +table_index_build_scan(A, B, C, D, true, E, F, NULL) +#elif PG_VERSION_NUM >= 110000 #define IndexBuildHeapScan(A, B, C, D, E, F) \ IndexBuildHeapScan(A, B, C, D, E, F, NULL) #endif diff --git a/src/rumscan.c b/src/rumscan.c index 27d7f05c2d..989d0d1ece 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -15,7 +15,6 @@ #include "access/relscan.h" #include "pgstat.h" -#include "utils/memutils.h" #include "rum.h" diff --git a/src/rumsort.c b/src/rumsort.c index 4c285d91de..8d6e8fe08e 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -3,115 +3,12 @@ * rumsort.h * Generalized tuple sorting routines. * - * This module handles sorting of heap tuples, index tuples, or single - * Datums (and could easily support other kinds of sortable objects, - * if necessary). It works efficiently for both small and large amounts - * of data. Small amounts are sorted in-memory using qsort(). Large - * amounts are sorted using temporary files and a standard external sort - * algorithm. + * This module handles sorting of RumSortItem or RumScanItem structures. + * It contains copy of static functions from + * src/backend/utils/sort/tuplesort.c. * - * See Knuth, volume 3, for more than you want to know about the external - * sorting algorithm. Historically, we divided the input into sorted runs - * using replacement selection, in the form of a priority tree implemented - * as a heap (essentially his Algorithm 5.2.3H -- although that strategy is - * often avoided altogether), but that can now only happen first the first - * run. We merge the runs using polyphase merge, Knuth's Algorithm - * 5.4.2D. The logical "tapes" used by Algorithm D are implemented by - * logtape.c, which avoids space wastage by recycling disk space as soon - * as each block is read from its "tape". * - * We never form the initial runs using Knuth's recommended replacement - * selection data structure (Algorithm 5.4.1R), because it uses a fixed - * number of records in memory at all times. Since we are dealing with - * tuples that may vary considerably in size, we want to be able to vary - * the number of records kept in memory to ensure full utilization of the - * allowed sort memory space. So, we keep the tuples in a variable-size - * heap, with the next record to go out at the top of the heap. Like - * Algorithm 5.4.1R, each record is stored with the run number that it - * must go into, and we use (run number, key) as the ordering key for the - * heap. When the run number at the top of the heap changes, we know that - * no more records of the prior run are left in the heap. Note that there - * are in practice only ever two distinct run numbers, due to the greatly - * reduced use of replacement selection in PostgreSQL 9.6. - * - * In PostgreSQL 9.6, a heap (based on Knuth's Algorithm H, with some small - * customizations) is only used with the aim of producing just one run, - * thereby avoiding all merging. Only the first run can use replacement - * selection, which is why there are now only two possible valid run - * numbers, and why heapification is customized to not distinguish between - * tuples in the second run (those will be quicksorted). We generally - * prefer a simple hybrid sort-merge strategy, where runs are sorted in much - * the same way as the entire input of an internal sort is sorted (using - * qsort()). The replacement_sort_tuples GUC controls the limited remaining - * use of replacement selection for the first run. - * - * There are several reasons to favor a hybrid sort-merge strategy. - * Maintaining a priority tree/heap has poor CPU cache characteristics. - * Furthermore, the growth in main memory sizes has greatly diminished the - * value of having runs that are larger than available memory, even in the - * case where there is partially sorted input and runs can be made far - * larger by using a heap. In most cases, a single-pass merge step is all - * that is required even when runs are no larger than available memory. - * Avoiding multiple merge passes was traditionally considered to be the - * major advantage of using replacement selection. - * - * The approximate amount of memory allowed for any one sort operation - * is specified in kilobytes by the caller (most pass work_mem). Initially, - * we absorb tuples and simply store them in an unsorted array as long as - * we haven't exceeded workMem. If we reach the end of the input without - * exceeding workMem, we sort the array using qsort() and subsequently return - * tuples just by scanning the tuple array sequentially. If we do exceed - * workMem, we begin to emit tuples into sorted runs in temporary tapes. - * When tuples are dumped in batch after quicksorting, we begin a new run - * with a new output tape (selected per Algorithm D). After the end of the - * input is reached, we dump out remaining tuples in memory into a final run - * (or two, when replacement selection is still used), then merge the runs - * using Algorithm D. - * - * When merging runs, we use a heap containing just the frontmost tuple from - * each source run; we repeatedly output the smallest tuple and insert the - * next tuple from its source tape (if any). When the heap empties, the merge - * is complete. The basic merge algorithm thus needs very little memory --- - * only M tuples for an M-way merge, and M is constrained to a small number. - * However, we can still make good use of our full workMem allocation by - * pre-reading additional tuples from each source tape. Without prereading, - * our access pattern to the temporary file would be very erratic; on average - * we'd read one block from each of M source tapes during the same time that - * we're writing M blocks to the output tape, so there is no sequentiality of - * access at all, defeating the read-ahead methods used by most Unix kernels. - * Worse, the output tape gets written into a very random sequence of blocks - * of the temp file, ensuring that things will be even worse when it comes - * time to read that tape. A straightforward merge pass thus ends up doing a - * lot of waiting for disk seeks. We can improve matters by prereading from - * each source tape sequentially, loading about workMem/M bytes from each tape - * in turn. Then we run the merge algorithm, writing but not reading until - * one of the preloaded tuple series runs out. Then we switch back to preread - * mode, fill memory again, and repeat. This approach helps to localize both - * read and write accesses. - * - * When the caller requests random access to the sort result, we form - * the final sorted run on a logical tape which is then "frozen", so - * that we can access it randomly. When the caller does not need random - * access, we return from rum_tuplesort_performsort() as soon as we are down - * to one run per logical tape. The final merge is then performed - * on-the-fly as the caller repeatedly calls rum_tuplesort_getXXX; this - * saves one cycle of writing all the data out to disk and reading it in. - * - * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the - * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according - * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that - * tape drives are expensive beasts, and in particular that there will always - * be many more runs than tape drives. In our implementation a "tape drive" - * doesn't cost much more than a few Kb of memory buffers, so we can afford - * to have lots of them. In particular, if we can have as many tape drives - * as sorted runs, we can eliminate any repeated I/O at all. In the current - * code we determine the number of tapes M on the basis of workMem: we want - * workMem/M to be large enough that we read a fair amount of data each time - * we preread from a tape, so as to maintain the locality of access described - * above. Nonetheless, with large workMem we can have many tapes. - * - * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -122,22 +19,12 @@ #include "miscadmin.h" #include "rumsort.h" -#include - -#include "access/htup_details.h" -#include "access/nbtree.h" -#include "catalog/index.h" -#include "catalog/pg_am.h" #include "commands/tablespace.h" #include "executor/executor.h" -#include "utils/datum.h" +#include "utils/guc.h" #include "utils/logtape.h" -#include "utils/lsyscache.h" -#include "utils/memutils.h" #include "utils/pg_rusage.h" #include "utils/probes.h" -#include "utils/rel.h" -#include "utils/sortsupport.h" #include "rum.h" /* RumItem */ @@ -147,15 +34,6 @@ #define DATUM_SORT 2 #define CLUSTER_SORT 3 -/* GUC variables */ -#ifdef TRACE_SORT -bool trace_sort = false; -#endif - -#ifdef DEBUG_BOUNDED_SORT -bool optimize_bounded_sort = true; -#endif - #if PG_VERSION_NUM < 100000 /* Provide fallback for old version of tape interface for 9.6 */ #define LogicalTapeRewindForRead(x, y, z) LogicalTapeRewind((x), (y), false) @@ -176,30 +54,9 @@ TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5) #endif /* - * The objects we actually sort are SortTuple structs. These contain - * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), - * which is a separate palloc chunk --- we assume it is just one chunk and - * can be freed by a simple pfree(). SortTuples also contain the tuple's - * first key column in Datum/nullflag format, and an index integer. - * - * Storing the first key column lets us save heap_getattr or index_getattr - * calls during tuple comparisons. We could extract and save all the key - * columns not just the first, but this would increase code complexity and - * overhead, and wouldn't actually save any comparison cycles in the common - * case where the first key determines the comparison result. Note that - * for a pass-by-reference datatype, datum1 points into the "tuple" storage. - * - * When sorting single Datums, the data value is represented directly by - * datum1/isnull1. If the datatype is pass-by-reference and isnull1 is false, - * then datum1 points to a separately palloc'd data value that is also pointed - * to by the "tuple" pointer; otherwise "tuple" is NULL. - * - * While building initial runs, tupindex holds the tuple's run number. During - * merge passes, we re-use it to hold the input tape number that each tuple in - * the heap was read from, or to hold the index of the next tuple pre-read - * from the same tape in the case of pre-read entries. tupindex goes unused - * if the sort occurs entirely in memory. + * Below are copied definitions from src/backend/utils/sort/tuplesort.c. */ + typedef struct { void *tuple; /* the tuple proper */ @@ -208,11 +65,6 @@ typedef struct int tupindex; /* see notes above */ } SortTuple; - -/* - * Possible states of a Tuplesort object. These denote the states that - * persist between calls of Tuplesort routines. - */ typedef enum { TSS_INITIAL, /* Loading tuples; still within memory limit */ @@ -223,17 +75,6 @@ typedef enum TSS_FINALMERGE /* Performing final merge on-the-fly */ } TupSortStatus; -/* - * Parameters for calculation of number of tapes to use --- see inittapes() - * and rum_tuplesort_merge_order(). - * - * In this calculation we assume that each tape will cost us about 3 blocks - * worth of buffer space (which is an underestimate for very large data - * volumes, but it's probably close enough --- see logtape.c). - * - * MERGE_BUFFER_SIZE is how much data we'd like to read from each input - * tape during a preread cycle (see discussion at top of file). - */ #define MINORDER 6 /* minimum merge order */ #define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3) #define MERGE_BUFFER_SIZE (BLCKSZ * 32) @@ -242,7 +83,7 @@ typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, RumTuplesortstate *state); /* - * Private state of a Tuplesort operation. + * Renamed copy of Tuplesortstate. */ struct RumTuplesortstate { @@ -453,47 +294,6 @@ struct RumTuplesortstate #define USEMEM(state,amt) ((state)->availMem -= (amt)) #define FREEMEM(state,amt) ((state)->availMem += (amt)) -/* - * NOTES about on-tape representation of tuples: - * - * We require the first "unsigned int" of a stored tuple to be the total size - * on-tape of the tuple, including itself (so it is never zero; an all-zero - * unsigned int is used to delimit runs). The remainder of the stored tuple - * may or may not match the in-memory representation of the tuple --- - * any conversion needed is the job of the writetup and readtup routines. - * - * If state->randomAccess is true, then the stored representation of the - * tuple must be followed by another "unsigned int" that is a copy of the - * length --- so the total tape space used is actually sizeof(unsigned int) - * more than the stored length value. This allows read-backwards. When - * randomAccess is not true, the write/read routines may omit the extra - * length word. - * - * writetup is expected to write both length words as well as the tuple - * data. When readtup is called, the tape is positioned just after the - * front length word; readtup must read the tuple data and advance past - * the back length word (if present). - * - * The write/read routines can make use of the tuple description data - * stored in the Tuplesortstate record, if needed. They are also expected - * to adjust state->availMem by the amount of memory space (not tape space!) - * released or consumed. There is no error return from either writetup - * or readtup; they should ereport() on failure. - * - * - * NOTES about memory consumption calculations: - * - * We count space allocated for tuples against the workMem limit, plus - * the space used by the variable-size memtuples array. Fixed-size space - * is not counted; it's small enough to not be interesting. - * - * Note that we count actual space used (as shown by GetMemoryChunkSpace) - * rather than the originally-requested size. This is important since - * palloc can add substantial overhead. It's not a complete answer since - * we won't count any wasted space in palloc allocation blocks, but it's - * a lot better than what we were doing before 7.3. - */ - /* When using this macro, beware of double evaluation of len */ #define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ do { \ @@ -519,40 +319,6 @@ static void rum_tuplesort_heap_insert(RumTuplesortstate *state, SortTuple *tuple static void rum_tuplesort_heap_siftup(RumTuplesortstate *state, bool checkIndex); static unsigned int getlen(RumTuplesortstate *state, int tapenum, bool eofOK); static void markrunend(RumTuplesortstate *state, int tapenum); -static int comparetup_heap(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static void copytup_heap(RumTuplesortstate *state, SortTuple *stup, void *tup); -static void writetup_heap(RumTuplesortstate *state, int tapenum, - SortTuple *stup); -static void readtup_heap(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); -static void reversedirection_heap(RumTuplesortstate *state); -static int comparetup_cluster(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static void copytup_cluster(RumTuplesortstate *state, SortTuple *stup, void *tup); -static void writetup_cluster(RumTuplesortstate *state, int tapenum, - SortTuple *stup); -static void readtup_cluster(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); -static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static void copytup_index(RumTuplesortstate *state, SortTuple *stup, void *tup); -static void writetup_index(RumTuplesortstate *state, int tapenum, - SortTuple *stup); -static void readtup_index(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); -static void reversedirection_index_btree(RumTuplesortstate *state); -static void reversedirection_index_hash(RumTuplesortstate *state); -static int comparetup_datum(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static void copytup_datum(RumTuplesortstate *state, SortTuple *stup, void *tup); -static void writetup_datum(RumTuplesortstate *state, int tapenum, - SortTuple *stup); -static void readtup_datum(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); -static void reversedirection_datum(RumTuplesortstate *state); static void free_sort_tuple(RumTuplesortstate *state, SortTuple *stup); static int comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state); @@ -946,221 +712,16 @@ rum_tuplesort_begin_common(int workMem, bool randomAccess) return state; } +/* + * Get sort state memory context. Currently it is used only to allocate + * RumSortItem. + */ MemoryContext rum_tuplesort_get_memorycontext(RumTuplesortstate *state) { return state->sortcontext; } -RumTuplesortstate * -rum_tuplesort_begin_heap(TupleDesc tupDesc, - int nkeys, AttrNumber *attNums, - Oid *sortOperators, Oid *sortCollations, - bool *nullsFirstFlags, - int workMem, bool randomAccess) -{ - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, randomAccess); - MemoryContext oldcontext; - int i; - - oldcontext = MemoryContextSwitchTo(state->sortcontext); - - AssertArg(nkeys > 0); - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", - nkeys, workMem, randomAccess ? 't' : 'f'); -#endif - - state->nKeys = nkeys; - - RUM_SORT_START(HEAP_SORT, - false, /* no unique check */ - nkeys, - workMem, - randomAccess); - - state->comparetup = comparetup_heap; - state->copytup = copytup_heap; - state->writetup = writetup_heap; - state->readtup = readtup_heap; - state->reversedirection = reversedirection_heap; - - state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ - - /* Prepare SortSupport data for each column */ - state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); - - for (i = 0; i < nkeys; i++) - { - SortSupport sortKey = state->sortKeys + i; - - AssertArg(attNums[i] != 0); - AssertArg(sortOperators[i] != 0); - - sortKey->ssup_cxt = CurrentMemoryContext; - sortKey->ssup_collation = sortCollations[i]; - sortKey->ssup_nulls_first = nullsFirstFlags[i]; - sortKey->ssup_attno = attNums[i]; - - PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); - } - - if (nkeys == 1) - state->onlyKey = state->sortKeys; - - MemoryContextSwitchTo(oldcontext); - - return state; -} - -RumTuplesortstate * -rum_tuplesort_begin_cluster(TupleDesc tupDesc, - Relation indexRel, - int workMem, bool randomAccess) -{ - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, randomAccess); - MemoryContext oldcontext; - - Assert(indexRel->rd_rel->relam == BTREE_AM_OID); - - oldcontext = MemoryContextSwitchTo(state->sortcontext); - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", - RelationGetNumberOfAttributes(indexRel), - workMem, randomAccess ? 't' : 'f'); -#endif - - state->nKeys = RelationGetNumberOfAttributes(indexRel); - - RUM_SORT_START(CLUSTER_SORT, - false, /* no unique check */ - state->nKeys, - workMem, - randomAccess); - - state->comparetup = comparetup_cluster; - state->copytup = copytup_cluster; - state->writetup = writetup_cluster; - state->readtup = readtup_cluster; - state->reversedirection = reversedirection_index_btree; - - state->indexInfo = BuildIndexInfo(indexRel); - state->indexScanKey = _bt_mkscankey_nodata(indexRel); - - state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ - - if (state->indexInfo->ii_Expressions != NULL) - { - TupleTableSlot *slot; - ExprContext *econtext; - - /* - * We will need to use FormIndexDatum to evaluate the index - * expressions. To do that, we need an EState, as well as a - * TupleTableSlot to put the table tuples into. The econtext's - * scantuple has to point to that slot, too. - */ - state->estate = CreateExecutorState(); -#if PG_VERSION_NUM >= 120000 - slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsVirtual); -#else - slot = MakeSingleTupleTableSlot(tupDesc); -#endif - econtext = GetPerTupleExprContext(state->estate); - econtext->ecxt_scantuple = slot; - } - - MemoryContextSwitchTo(oldcontext); - - return state; -} - -RumTuplesortstate * -rum_tuplesort_begin_index_btree(Relation heapRel, - Relation indexRel, - bool enforceUnique, - int workMem, bool randomAccess) -{ - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, randomAccess); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(state->sortcontext); - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin index sort: unique = %c, workMem = %d, randomAccess = %c", - enforceUnique ? 't' : 'f', - workMem, randomAccess ? 't' : 'f'); -#endif - - state->nKeys = RelationGetNumberOfAttributes(indexRel); - - RUM_SORT_START(INDEX_SORT, - enforceUnique, - state->nKeys, - workMem, - randomAccess); - - state->comparetup = comparetup_index_btree; - state->copytup = copytup_index; - state->writetup = writetup_index; - state->readtup = readtup_index; - state->reversedirection = reversedirection_index_btree; - - state->heapRel = heapRel; - state->indexRel = indexRel; - state->indexScanKey = _bt_mkscankey_nodata(indexRel); - state->enforceUnique = enforceUnique; - - MemoryContextSwitchTo(oldcontext); - - return state; -} - -RumTuplesortstate * -rum_tuplesort_begin_index_hash(Relation heapRel, - Relation indexRel, - uint32 hash_mask, - int workMem, bool randomAccess) -{ - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, randomAccess); - MemoryContext oldcontext; - - oldcontext = MemoryContextSwitchTo(state->sortcontext); - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin index sort: hash_mask = 0x%x, workMem = %d, randomAccess = %c", - hash_mask, - workMem, randomAccess ? 't' : 'f'); -#endif - - state->nKeys = 1; /* Only one sort column, the hash code */ - - state->comparetup = comparetup_index_hash; - state->copytup = copytup_index; - state->writetup = writetup_index; - state->readtup = readtup_index; - state->reversedirection = reversedirection_index_hash; - - state->heapRel = heapRel; - state->indexRel = indexRel; - - state->hash_mask = hash_mask; - - MemoryContextSwitchTo(oldcontext); - - return state; -} - RumTuplesortstate * rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer) @@ -1232,94 +793,6 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) return state; } -RumTuplesortstate * -rum_tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, - bool nullsFirstFlag, - int workMem, bool randomAccess) -{ - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, randomAccess); - MemoryContext oldcontext; - int16 typlen; - bool typbyval; - - oldcontext = MemoryContextSwitchTo(state->sortcontext); - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin datum sort: workMem = %d, randomAccess = %c", - workMem, randomAccess ? 't' : 'f'); -#endif - - state->nKeys = 1; /* always a one-column sort */ - - RUM_SORT_START(DATUM_SORT, - false, /* no unique check */ - 1, - workMem, - randomAccess); - - state->comparetup = comparetup_datum; - state->copytup = copytup_datum; - state->writetup = writetup_datum; - state->readtup = readtup_datum; - state->reversedirection = reversedirection_datum; - - state->datumType = datumType; - - /* Prepare SortSupport data */ - state->onlyKey = (SortSupport) palloc0(sizeof(SortSupportData)); - - state->onlyKey->ssup_cxt = CurrentMemoryContext; - state->onlyKey->ssup_collation = sortCollation; - state->onlyKey->ssup_nulls_first = nullsFirstFlag; - - PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey); - - /* lookup necessary attributes of the datum type */ - get_typlenbyval(datumType, &typlen, &typbyval); - state->datumTypeLen = typlen; - state->datumTypeByVal = typbyval; - - MemoryContextSwitchTo(oldcontext); - - return state; -} - -/* - * rum_tuplesort_set_bound - * - * Advise tuplesort that at most the first N result tuples are required. - * - * Must be called before inserting any tuples. (Actually, we could allow it - * as long as the sort hasn't spilled to disk, but there seems no need for - * delayed calls at the moment.) - * - * This is a hint only. The tuplesort may still return more tuples than - * requested. - */ -void -rum_tuplesort_set_bound(RumTuplesortstate *state, int64 bound) -{ - /* Assert we're called before loading any tuples */ - Assert(state->status == TSS_INITIAL); - Assert(state->memtupcount == 0); - Assert(!state->bounded); - -#ifdef DEBUG_BOUNDED_SORT - /* Honor GUC setting that disables the feature (for easy testing) */ - if (!optimize_bounded_sort) - return; -#endif - - /* We want to be able to compute bound * 2, so limit the setting */ - if (bound > (int64) (INT_MAX / 2)) - return; - - state->bounded = true; - state->bound = (int) bound; -} - /* * rum_tuplesort_end * @@ -1512,35 +985,8 @@ grow_memtuples(RumTuplesortstate *state) return false; } -/* - * Accept one tuple while collecting input data for sort. - * - * Note that the input data is always copied; the caller need not save it. - */ -void -rum_tuplesort_puttupleslot(RumTuplesortstate *state, TupleTableSlot *slot) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - /* - * Copy the given tuple into memory we control, and decrease availMem. - * Then call the common code. - */ - COPYTUP(state, &stup, (void *) slot); - - puttuple_common(state, &stup); - - MemoryContextSwitchTo(oldcontext); -} - -/* - * Accept one tuple while collecting input data for sort. - * - * Note that the input data is always copied; the caller need not save it. - */ void -rum_tuplesort_putheaptuple(RumTuplesortstate *state, HeapTuple tup) +rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem * item) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1549,20 +995,15 @@ rum_tuplesort_putheaptuple(RumTuplesortstate *state, HeapTuple tup) * Copy the given tuple into memory we control, and decrease availMem. * Then call the common code. */ - COPYTUP(state, &stup, (void *) tup); + COPYTUP(state, &stup, (void *) item); puttuple_common(state, &stup); MemoryContextSwitchTo(oldcontext); } -/* - * Accept one index tuple while collecting input data for sort. - * - * Note that the input tuple is always copied; the caller need not save it. - */ void -rum_tuplesort_putindextuple(RumTuplesortstate *state, IndexTuple tuple) +rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem * item) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1571,7 +1012,7 @@ rum_tuplesort_putindextuple(RumTuplesortstate *state, IndexTuple tuple) * Copy the given tuple into memory we control, and decrease availMem. * Then call the common code. */ - COPYTUP(state, &stup, (void *) tuple); + COPYTUP(state, &stup, (void *) item); puttuple_common(state, &stup); @@ -1579,96 +1020,28 @@ rum_tuplesort_putindextuple(RumTuplesortstate *state, IndexTuple tuple) } /* - * Accept one Datum while collecting input data for sort. - * - * If the Datum is pass-by-ref type, the value will be copied. + * Shared code for tuple and datum cases. */ -void -rum_tuplesort_putdatum(RumTuplesortstate *state, Datum val, bool isNull) +static void +puttuple_common(RumTuplesortstate *state, SortTuple *tuple) { - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - /* - * If it's a pass-by-reference value, copy it into memory we control, and - * decrease availMem. Then call the common code. - */ - if (isNull || state->datumTypeByVal) - { - stup.datum1 = val; - stup.isnull1 = isNull; - stup.tuple = NULL; /* no separate storage */ - } - else + switch (state->status) { - stup.datum1 = datumCopy(val, false, state->datumTypeLen); - stup.isnull1 = false; - stup.tuple = DatumGetPointer(stup.datum1); - USEMEM(state, GetMemoryChunkSpace(stup.tuple)); - } + case TSS_INITIAL: - puttuple_common(state, &stup); - - MemoryContextSwitchTo(oldcontext); -} - -void -rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem * item) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - /* - * Copy the given tuple into memory we control, and decrease availMem. - * Then call the common code. - */ - COPYTUP(state, &stup, (void *) item); - - puttuple_common(state, &stup); - - MemoryContextSwitchTo(oldcontext); -} - -void -rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem * item) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - /* - * Copy the given tuple into memory we control, and decrease availMem. - * Then call the common code. - */ - COPYTUP(state, &stup, (void *) item); - - puttuple_common(state, &stup); - - MemoryContextSwitchTo(oldcontext); -} - -/* - * Shared code for tuple and datum cases. - */ -static void -puttuple_common(RumTuplesortstate *state, SortTuple *tuple) -{ - switch (state->status) - { - case TSS_INITIAL: - - /* - * Save the tuple into the unsorted array. First, grow the array - * as needed. Note that we try to grow the array when there is - * still one free slot remaining --- if we fail, there'll still be - * room to store the incoming tuple, and then we'll switch to - * tape-based operation. - */ - if (state->memtupcount >= state->memtupsize - 1) - { - (void) grow_memtuples(state); - Assert(state->memtupcount < state->memtupsize); - } - state->memtuples[state->memtupcount++] = *tuple; + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; /* * Check if it's time to switch over to a bounded heapsort. We do @@ -2065,115 +1438,6 @@ rum_tuplesort_gettuple_common(RumTuplesortstate *state, bool forward, } } -/* - * Fetch the next tuple in either forward or back direction. - * If successful, put tuple in slot and return true; else, clear the slot - * and return false. - */ -bool -rum_tuplesort_gettupleslot(RumTuplesortstate *state, bool forward, - TupleTableSlot *slot) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - bool should_free; - - if (!rum_tuplesort_gettuple_common(state, forward, &stup, &should_free)) - stup.tuple = NULL; - - MemoryContextSwitchTo(oldcontext); - - if (stup.tuple) - { - ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, should_free); - return true; - } - else - { - ExecClearTuple(slot); - return false; - } -} - -/* - * Fetch the next tuple in either forward or back direction. - * Returns NULL if no more tuples. If *should_free is set, the - * caller must pfree the returned tuple when done with it. - */ -HeapTuple -rum_tuplesort_getheaptuple(RumTuplesortstate *state, bool forward, bool *should_free) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - if (!rum_tuplesort_gettuple_common(state, forward, &stup, should_free)) - stup.tuple = NULL; - - MemoryContextSwitchTo(oldcontext); - - return stup.tuple; -} - -/* - * Fetch the next index tuple in either forward or back direction. - * Returns NULL if no more tuples. If *should_free is set, the - * caller must pfree the returned tuple when done with it. - */ -IndexTuple -rum_tuplesort_getindextuple(RumTuplesortstate *state, bool forward, - bool *should_free) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - if (!rum_tuplesort_gettuple_common(state, forward, &stup, should_free)) - stup.tuple = NULL; - - MemoryContextSwitchTo(oldcontext); - - return (IndexTuple) stup.tuple; -} - -/* - * Fetch the next Datum in either forward or back direction. - * Returns false if no more datums. - * - * If the Datum is pass-by-ref type, the returned value is freshly palloc'd - * and is now owned by the caller. - */ -bool -rum_tuplesort_getdatum(RumTuplesortstate *state, bool forward, - Datum *val, bool *isNull) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - bool should_free; - - if (!rum_tuplesort_gettuple_common(state, forward, &stup, &should_free)) - { - MemoryContextSwitchTo(oldcontext); - return false; - } - - if (stup.isnull1 || state->datumTypeByVal) - { - *val = stup.datum1; - *isNull = stup.isnull1; - } - else - { - if (should_free) - *val = stup.datum1; - else - *val = datumCopy(stup.datum1, false, state->datumTypeLen); - *isNull = false; - } - - MemoryContextSwitchTo(oldcontext); - - return true; -} - RumSortItem * rum_tuplesort_getrum(RumTuplesortstate *state, bool forward, bool *should_free) { @@ -2846,165 +2110,6 @@ dumptuples(RumTuplesortstate *state, bool alltuples) } } -/* - * rum_tuplesort_rescan - rewind and replay the scan - */ -void -rum_tuplesort_rescan(RumTuplesortstate *state) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - - Assert(state->randomAccess); - - switch (state->status) - { - case TSS_SORTEDINMEM: - state->current = 0; - state->eof_reached = false; - state->markpos_offset = 0; - state->markpos_eof = false; - break; - case TSS_SORTEDONTAPE: - LogicalTapeRewindForRead(state->tapeset, - state->result_tape, - state->read_buffer_size); - state->eof_reached = false; - state->markpos_block = 0L; - state->markpos_offset = 0; - state->markpos_eof = false; - break; - default: - elog(ERROR, "invalid tuplesort state"); - break; - } - - MemoryContextSwitchTo(oldcontext); -} - -/* - * rum_tuplesort_markpos - saves current position in the merged sort file - */ -void -rum_tuplesort_markpos(RumTuplesortstate *state) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - - Assert(state->randomAccess); - - switch (state->status) - { - case TSS_SORTEDINMEM: - state->markpos_offset = state->current; - state->markpos_eof = state->eof_reached; - break; - case TSS_SORTEDONTAPE: - LogicalTapeTell(state->tapeset, - state->result_tape, - &state->markpos_block, - &state->markpos_offset); - state->markpos_eof = state->eof_reached; - break; - default: - elog(ERROR, "invalid tuplesort state"); - break; - } - - MemoryContextSwitchTo(oldcontext); -} - -/* - * rum_tuplesort_restorepos - restores current position in merged sort file to - * last saved position - */ -void -rum_tuplesort_restorepos(RumTuplesortstate *state) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - - Assert(state->randomAccess); - - switch (state->status) - { - case TSS_SORTEDINMEM: - state->current = state->markpos_offset; - state->eof_reached = state->markpos_eof; - break; - case TSS_SORTEDONTAPE: -#if PG_VERSION_NUM < 100000 - if (!LogicalTapeSeek(state->tapeset, - state->result_tape, - state->markpos_block, - state->markpos_offset)) - elog(ERROR, "rum_tuplesort_restorepos failed"); -#else - LogicalTapeSeek(state->tapeset, - state->result_tape, - state->markpos_block, - state->markpos_offset); -#endif - state->eof_reached = state->markpos_eof; - break; - default: - elog(ERROR, "invalid tuplesort state"); - break; - } - - MemoryContextSwitchTo(oldcontext); -} - -/* - * rum_tuplesort_get_stats - extract summary statistics - * - * This can be called after rum_tuplesort_performsort() finishes to obtain - * printable summary information about how the sort was performed. - * spaceUsed is measured in kilobytes. - */ -void -rum_tuplesort_get_stats(RumTuplesortstate *state, - const char **sortMethod, - const char **spaceType, - long *spaceUsed) -{ - /* - * Note: it might seem we should provide both memory and disk usage for a - * disk-based sort. However, the current code doesn't track memory space - * accurately once we have begun to return tuples to the caller (since we - * don't account for pfree's the caller is expected to do), so we cannot - * rely on availMem in a disk sort. This does not seem worth the overhead - * to fix. Is it worth creating an API for the memory context code to - * tell us how much is actually used in sortcontext? - */ - if (state->tapeset) - { - *spaceType = "Disk"; - *spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024); - } - else - { - *spaceType = "Memory"; - *spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; - } - - switch (state->status) - { - case TSS_SORTEDINMEM: - if (state->boundUsed) - *sortMethod = "top-N heapsort"; - else - *sortMethod = "quicksort"; - break; - case TSS_SORTEDONTAPE: - *sortMethod = "external sort"; - break; - case TSS_FINALMERGE: - *sortMethod = "external merge"; - break; - default: - *sortMethod = "still in progress"; - break; - } -} - /* * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. @@ -3223,775 +2328,6 @@ markrunend(RumTuplesortstate *state, int tapenum) } -/* - * Inline-able copy of FunctionCall2Coll() to save some cycles in sorting. - */ -static inline Datum -myFunctionCall2Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2) -{ - FunctionCallInfoData fcinfo; - Datum result; - - InitFunctionCallInfoData(fcinfo, flinfo, 2, collation, NULL, NULL); - - fcinfo.arg[0] = arg1; - fcinfo.arg[1] = arg2; - fcinfo.argnull[0] = false; - fcinfo.argnull[1] = false; - - result = FunctionCallInvoke(&fcinfo); - - /* Check for null result, since caller is clearly not expecting one */ - if (fcinfo.isnull) - elog(ERROR, "function %u returned NULL", fcinfo.flinfo->fn_oid); - - return result; -} - -/* - * Apply a sort function (by now converted to fmgr lookup form) - * and return a 3-way comparison result. This takes care of handling - * reverse-sort and NULLs-ordering properly. We assume that DESC and - * NULLS_FIRST options are encoded in sk_flags the same way btree does it. - */ -static inline int32 -inlineApplySortFunction(FmgrInfo *sortFunction, int sk_flags, Oid collation, - Datum datum1, bool isNull1, - Datum datum2, bool isNull2) -{ - int32 compare; - - if (isNull1) - { - if (isNull2) - compare = 0; /* NULL "=" NULL */ - else if (sk_flags & SK_BT_NULLS_FIRST) - compare = -1; /* NULL "<" NOT_NULL */ - else - compare = 1; /* NULL ">" NOT_NULL */ - } - else if (isNull2) - { - if (sk_flags & SK_BT_NULLS_FIRST) - compare = 1; /* NOT_NULL ">" NULL */ - else - compare = -1; /* NOT_NULL "<" NULL */ - } - else - { - compare = DatumGetInt32(myFunctionCall2Coll(sortFunction, collation, - datum1, datum2)); - - if (sk_flags & SK_BT_DESC) - compare = -compare; - } - - return compare; -} - - -/* - * Routines specialized for HeapTuple (actually MinimalTuple) case - */ - -static int -comparetup_heap(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) -{ - SortSupport sortKey = state->sortKeys; - HeapTupleData ltup; - HeapTupleData rtup; - TupleDesc tupDesc; - int nkey; - int32 compare; - - /* Compare the leading sort key */ - compare = ApplySortComparator(a->datum1, a->isnull1, - b->datum1, b->isnull1, - sortKey); - if (compare != 0) - return compare; - - /* Compare additional sort keys */ - ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; - ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); - rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; - rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); - tupDesc = state->tupDesc; - sortKey++; - for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) - { - AttrNumber attno = sortKey->ssup_attno; - Datum datum1, - datum2; - bool isnull1, - isnull2; - - datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); - datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); - - compare = ApplySortComparator(datum1, isnull1, - datum2, isnull2, - sortKey); - if (compare != 0) - return compare; - } - - return 0; -} - -static void -copytup_heap(RumTuplesortstate *state, SortTuple *stup, void *tup) -{ - /* - * We expect the passed "tup" to be a TupleTableSlot, and form a - * MinimalTuple using the exported interface for that. - */ - TupleTableSlot *slot = (TupleTableSlot *) tup; - MinimalTuple tuple; - HeapTupleData htup; - - /* copy the tuple into sort storage */ - tuple = ExecCopySlotMinimalTuple(slot); - stup->tuple = (void *) tuple; - USEMEM(state, GetMemoryChunkSpace(tuple)); - /* set up first-column key value */ - htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; - htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); - stup->datum1 = heap_getattr(&htup, - state->sortKeys[0].ssup_attno, - state->tupDesc, - &stup->isnull1); -} - -static void -writetup_heap(RumTuplesortstate *state, int tapenum, SortTuple *stup) -{ - MinimalTuple tuple = (MinimalTuple) stup->tuple; - - /* the part of the MinimalTuple we'll write: */ - char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; - unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; - - /* total on-disk footprint: */ - unsigned int tuplen = tupbodylen + sizeof(int); - - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &tuplen, sizeof(tuplen)); - LogicalTapeWrite(state->tapeset, tapenum, - (void *) tupbody, tupbodylen); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &tuplen, sizeof(tuplen)); - - FREEMEM(state, GetMemoryChunkSpace(tuple)); - heap_free_minimal_tuple(tuple); -} - -static void -readtup_heap(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len) -{ - unsigned int tupbodylen = len - sizeof(int); - unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; - MinimalTuple tuple = (MinimalTuple) palloc(tuplen); - char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; - HeapTupleData htup; - - USEMEM(state, GetMemoryChunkSpace(tuple)); - /* read in the tuple proper */ - tuple->t_len = tuplen; - LogicalTapeReadExact(state->tapeset, tapenum, - tupbody, tupbodylen); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); - stup->tuple = (void *) tuple; - /* set up first-column key value */ - htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; - htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); - stup->datum1 = heap_getattr(&htup, - state->sortKeys[0].ssup_attno, - state->tupDesc, - &stup->isnull1); -} - -static void -reversedirection_heap(RumTuplesortstate *state) -{ - SortSupport sortKey = state->sortKeys; - int nkey; - - for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) - { - sortKey->ssup_reverse = !sortKey->ssup_reverse; - sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; - } -} - - -/* - * Routines specialized for the CLUSTER case (HeapTuple data, with - * comparisons per a btree index definition) - */ - -static int -comparetup_cluster(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state) -{ - ScanKey scanKey = state->indexScanKey; - HeapTuple ltup; - HeapTuple rtup; - TupleDesc tupDesc; - int nkey; - int32 compare; - - /* Compare the leading sort key, if it's simple */ -#if PG_VERSION_NUM >= 110000 - if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) -#else - if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) -#endif - { - compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags, - scanKey->sk_collation, - a->datum1, a->isnull1, - b->datum1, b->isnull1); - if (compare != 0 || state->nKeys == 1) - return compare; - /* Compare additional columns the hard way */ - scanKey++; - nkey = 1; - } - else - { - /* Must compare all keys the hard way */ - nkey = 0; - } - - /* Compare additional sort keys */ - ltup = (HeapTuple) a->tuple; - rtup = (HeapTuple) b->tuple; - - if (state->indexInfo->ii_Expressions == NULL) - { - /* If not expression index, just compare the proper heap attrs */ - tupDesc = state->tupDesc; - - for (; nkey < state->nKeys; nkey++, scanKey++) - { -#if PG_VERSION_NUM >= 110000 - AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; -#else - AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[nkey]; -#endif - Datum datum1, - datum2; - bool isnull1, - isnull2; - - datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); - datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); - - compare = inlineApplySortFunction(&scanKey->sk_func, - scanKey->sk_flags, - scanKey->sk_collation, - datum1, isnull1, - datum2, isnull2); - if (compare != 0) - return compare; - } - } - else - { - /* - * In the expression index case, compute the whole index tuple and - * then compare values. It would perhaps be faster to compute only as - * many columns as we need to compare, but that would require - * duplicating all the logic in FormIndexDatum. - */ - Datum l_index_values[INDEX_MAX_KEYS]; - bool l_index_isnull[INDEX_MAX_KEYS]; - Datum r_index_values[INDEX_MAX_KEYS]; - bool r_index_isnull[INDEX_MAX_KEYS]; - TupleTableSlot *ecxt_scantuple; - - /* Reset context each time to prevent memory leakage */ - ResetPerTupleExprContext(state->estate); - - ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; - -#if PG_VERSION_NUM >= 120000 - ExecStoreHeapTuple(ltup, ecxt_scantuple, false); -#else - ExecStoreTuple(ltup, ecxt_scantuple, InvalidBuffer, false); -#endif - FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, - l_index_values, l_index_isnull); - -#if PG_VERSION_NUM >= 120000 - ExecStoreHeapTuple(rtup, ecxt_scantuple, false); -#else - ExecStoreTuple(rtup, ecxt_scantuple, InvalidBuffer, false); -#endif - FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, - r_index_values, r_index_isnull); - - for (; nkey < state->nKeys; nkey++, scanKey++) - { - compare = inlineApplySortFunction(&scanKey->sk_func, - scanKey->sk_flags, - scanKey->sk_collation, - l_index_values[nkey], - l_index_isnull[nkey], - r_index_values[nkey], - r_index_isnull[nkey]); - if (compare != 0) - return compare; - } - } - - return 0; -} - -static void -copytup_cluster(RumTuplesortstate *state, SortTuple *stup, void *tup) -{ - HeapTuple tuple = (HeapTuple) tup; -#if PG_VERSION_NUM >= 110000 - AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[0]; -#else - AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[0]; -#endif - - /* copy the tuple into sort storage */ - tuple = heap_copytuple(tuple); - stup->tuple = (void *) tuple; - USEMEM(state, GetMemoryChunkSpace(tuple)); - /* set up first-column key value, if it's a simple column */ - if (attno != 0) - stup->datum1 = heap_getattr(tuple, - attno, - state->tupDesc, - &stup->isnull1); -} - -static void -writetup_cluster(RumTuplesortstate *state, int tapenum, SortTuple *stup) -{ - HeapTuple tuple = (HeapTuple) stup->tuple; - unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); - - /* We need to store t_self, but not other fields of HeapTupleData */ - LogicalTapeWrite(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); - LogicalTapeWrite(state->tapeset, tapenum, - &tuple->t_self, sizeof(ItemPointerData)); - LogicalTapeWrite(state->tapeset, tapenum, - tuple->t_data, tuple->t_len); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); - - FREEMEM(state, GetMemoryChunkSpace(tuple)); - heap_freetuple(tuple); -} - -static void -readtup_cluster(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int tuplen) -{ - unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); - HeapTuple tuple = (HeapTuple) palloc(t_len + HEAPTUPLESIZE); -#if PG_VERSION_NUM >= 110000 - AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[0]; -#else - AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[0]; -#endif - - USEMEM(state, GetMemoryChunkSpace(tuple)); - /* Reconstruct the HeapTupleData header */ - tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); - tuple->t_len = t_len; - LogicalTapeReadExact(state->tapeset, tapenum, - &tuple->t_self, sizeof(ItemPointerData)); - /* We don't currently bother to reconstruct t_tableOid */ - tuple->t_tableOid = InvalidOid; - /* Read in the tuple body */ - LogicalTapeReadExact(state->tapeset, tapenum, - tuple->t_data, tuple->t_len); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); - stup->tuple = (void *) tuple; - /* set up first-column key value, if it's a simple column */ - if (attno != 0) - stup->datum1 = heap_getattr(tuple, - attno, - state->tupDesc, - &stup->isnull1); -} - - -/* - * Routines specialized for IndexTuple case - * - * The btree and hash cases require separate comparison functions, but the - * IndexTuple representation is the same so the copy/write/read support - * functions can be shared. - */ - -static int -comparetup_index_btree(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state) -{ - /* - * This is similar to _bt_tuplecompare(), but we have already done the - * index_getattr calls for the first column, and we need to keep track of - * whether any null fields are present. Also see the special treatment - * for equal keys at the end. - */ - ScanKey scanKey = state->indexScanKey; - IndexTuple tuple1; - IndexTuple tuple2; - int keysz; - TupleDesc tupDes; - bool equal_hasnull = false; - int nkey; - int32 compare; - - /* Compare the leading sort key */ - compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags, - scanKey->sk_collation, - a->datum1, a->isnull1, - b->datum1, b->isnull1); - if (compare != 0) - return compare; - - /* they are equal, so we only need to examine one null flag */ - if (a->isnull1) - equal_hasnull = true; - - /* Compare additional sort keys */ - tuple1 = (IndexTuple) a->tuple; - tuple2 = (IndexTuple) b->tuple; - keysz = state->nKeys; - tupDes = RelationGetDescr(state->indexRel); - scanKey++; - for (nkey = 2; nkey <= keysz; nkey++, scanKey++) - { - Datum datum1, - datum2; - bool isnull1, - isnull2; - - datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); - datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); - - compare = inlineApplySortFunction(&scanKey->sk_func, scanKey->sk_flags, - scanKey->sk_collation, - datum1, isnull1, - datum2, isnull2); - if (compare != 0) - return compare; /* done when we find unequal attributes */ - - /* they are equal, so we only need to examine one null flag */ - if (isnull1) - equal_hasnull = true; - } - - /* - * If btree has asked us to enforce uniqueness, complain if two equal - * tuples are detected (unless there was at least one NULL field). - * - * It is sufficient to make the test here, because if two tuples are equal - * they *must* get compared at some stage of the sort --- otherwise the - * sort algorithm wouldn't have checked whether one must appear before the - * other. - */ - if (state->enforceUnique && !equal_hasnull) - { - Datum values[INDEX_MAX_KEYS]; - bool isnull[INDEX_MAX_KEYS]; - char *key_desc; - - /* - * Some rather brain-dead implementations of qsort (such as the one in - * QNX 4) will sometimes call the comparison routine to compare a - * value to itself, but we always use our own implementation, which - * does not. - */ - Assert(tuple1 != tuple2); - - index_deform_tuple(tuple1, tupDes, values, isnull); - - key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); - - ereport(ERROR, - (errcode(ERRCODE_UNIQUE_VIOLATION), - errmsg("could not create unique index \"%s\"", - RelationGetRelationName(state->indexRel)), - key_desc ? errdetail("Key %s is duplicated.", key_desc) : - errdetail("Duplicate keys exist."), - errtableconstraint(state->heapRel, - RelationGetRelationName(state->indexRel)))); - } - - /* - * If key values are equal, we sort on ItemPointer. This does not affect - * validity of the finished index, but it may be useful to have index - * scans in physical order. - */ - { - BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); - BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); - - if (blk1 != blk2) - return (blk1 < blk2) ? -1 : 1; - } - { - OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); - OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); - - if (pos1 != pos2) - return (pos1 < pos2) ? -1 : 1; - } - - return 0; -} - -static int -comparetup_index_hash(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state) -{ - uint32 hash1; - uint32 hash2; - IndexTuple tuple1; - IndexTuple tuple2; - - /* - * Fetch hash keys and mask off bits we don't want to sort by. We know - * that the first column of the index tuple is the hash key. - */ - Assert(!a->isnull1); - hash1 = DatumGetUInt32(a->datum1) & state->hash_mask; - Assert(!b->isnull1); - hash2 = DatumGetUInt32(b->datum1) & state->hash_mask; - - if (hash1 > hash2) - return 1; - else if (hash1 < hash2) - return -1; - - /* - * If hash values are equal, we sort on ItemPointer. This does not affect - * validity of the finished index, but it may be useful to have index - * scans in physical order. - */ - tuple1 = (IndexTuple) a->tuple; - tuple2 = (IndexTuple) b->tuple; - - { - BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); - BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); - - if (blk1 != blk2) - return (blk1 < blk2) ? -1 : 1; - } - { - OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); - OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); - - if (pos1 != pos2) - return (pos1 < pos2) ? -1 : 1; - } - - return 0; -} - -static void -copytup_index(RumTuplesortstate *state, SortTuple *stup, void *tup) -{ - IndexTuple tuple = (IndexTuple) tup; - unsigned int tuplen = IndexTupleSize(tuple); - IndexTuple newtuple; - - /* copy the tuple into sort storage */ - newtuple = (IndexTuple) palloc(tuplen); - memcpy(newtuple, tuple, tuplen); - USEMEM(state, GetMemoryChunkSpace(newtuple)); - stup->tuple = (void *) newtuple; - /* set up first-column key value */ - stup->datum1 = index_getattr(newtuple, - 1, - RelationGetDescr(state->indexRel), - &stup->isnull1); -} - -static void -writetup_index(RumTuplesortstate *state, int tapenum, SortTuple *stup) -{ - IndexTuple tuple = (IndexTuple) stup->tuple; - unsigned int tuplen; - - tuplen = IndexTupleSize(tuple) + sizeof(tuplen); - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &tuplen, sizeof(tuplen)); - LogicalTapeWrite(state->tapeset, tapenum, - (void *) tuple, IndexTupleSize(tuple)); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &tuplen, sizeof(tuplen)); - - FREEMEM(state, GetMemoryChunkSpace(tuple)); - pfree(tuple); -} - -static void -readtup_index(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len) -{ - unsigned int tuplen = len - sizeof(unsigned int); - IndexTuple tuple = (IndexTuple) palloc(tuplen); - - USEMEM(state, GetMemoryChunkSpace(tuple)); - LogicalTapeReadExact(state->tapeset, tapenum, - tuple, tuplen); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); - stup->tuple = (void *) tuple; - /* set up first-column key value */ - stup->datum1 = index_getattr(tuple, - 1, - RelationGetDescr(state->indexRel), - &stup->isnull1); -} - -static void -reversedirection_index_btree(RumTuplesortstate *state) -{ - ScanKey scanKey = state->indexScanKey; - int nkey; - - for (nkey = 0; nkey < state->nKeys; nkey++, scanKey++) - { - scanKey->sk_flags ^= (SK_BT_DESC | SK_BT_NULLS_FIRST); - } -} - -static void -reversedirection_index_hash(RumTuplesortstate *state) -{ - /* We don't support reversing direction in a hash index sort */ - elog(ERROR, "reversedirection_index_hash is not implemented"); -} - - -/* - * Routines specialized for DatumTuple case - */ - -static int -comparetup_datum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) -{ - return ApplySortComparator(a->datum1, a->isnull1, - b->datum1, b->isnull1, - state->onlyKey); -} - -static void -copytup_datum(RumTuplesortstate *state, SortTuple *stup, void *tup) -{ - /* Not currently needed */ - elog(ERROR, "copytup_datum() should not be called"); -} - -static void -writetup_datum(RumTuplesortstate *state, int tapenum, SortTuple *stup) -{ - void *waddr; - unsigned int tuplen; - unsigned int writtenlen; - - if (stup->isnull1) - { - waddr = NULL; - tuplen = 0; - } - else if (state->datumTypeByVal) - { - waddr = &stup->datum1; - tuplen = sizeof(Datum); - } - else - { - waddr = DatumGetPointer(stup->datum1); - tuplen = datumGetSize(stup->datum1, false, state->datumTypeLen); - Assert(tuplen != 0); - } - - writtenlen = tuplen + sizeof(unsigned int); - - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(state->tapeset, tapenum, - waddr, tuplen); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(state->tapeset, tapenum, - (void *) &writtenlen, sizeof(writtenlen)); - - if (stup->tuple) - { - FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); - pfree(stup->tuple); - } -} - -static void -readtup_datum(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len) -{ - unsigned int tuplen = len - sizeof(unsigned int); - - if (tuplen == 0) - { - /* it's NULL */ - stup->datum1 = (Datum) 0; - stup->isnull1 = true; - stup->tuple = NULL; - } - else if (state->datumTypeByVal) - { - Assert(tuplen == sizeof(Datum)); - LogicalTapeReadExact(state->tapeset, tapenum, - &stup->datum1, tuplen); - stup->isnull1 = false; - stup->tuple = NULL; - } - else - { - void *raddr = palloc(tuplen); - - LogicalTapeReadExact(state->tapeset, tapenum, - raddr, tuplen); - stup->datum1 = PointerGetDatum(raddr); - stup->isnull1 = false; - stup->tuple = raddr; - USEMEM(state, GetMemoryChunkSpace(raddr)); - } - - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); -} - -static void -reversedirection_datum(RumTuplesortstate *state) -{ - state->onlyKey->ssup_reverse = !state->onlyKey->ssup_reverse; - state->onlyKey->ssup_nulls_first = !state->onlyKey->ssup_nulls_first; -} - /* * Convenience routine to free a tuple previously loaded into sort memory */ diff --git a/src/rumsort.h b/src/rumsort.h index 9cb74feeab..452fdfc596 100644 --- a/src/rumsort.h +++ b/src/rumsort.h @@ -3,13 +3,11 @@ * rumsort.h * Generalized tuple sorting routines. * - * This module handles sorting of heap tuples, index tuples, or single - * Datums (and could easily support other kinds of sortable objects, - * if necessary). It works efficiently for both small and large amounts - * of data. Small amounts are sorted in-memory using qsort(). Large - * amounts are sorted using temporary files and a standard external sort - * algorithm. + * This module handles sorting of RumSortItem or RumScanItem structures. + * It contains copy of static functions from + * src/backend/utils/sort/tuplesort.c. * + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * @@ -22,43 +20,14 @@ #include "postgres.h" #include "fmgr.h" -#include "access/itup.h" #include "executor/tuptable.h" -#include "utils/relcache.h" -/* Tuplesortstate is an opaque type whose details are not known outside - * tuplesort.c. +/* RumTuplesortstate is an opaque type whose details are not known outside + * rumsort.c. */ typedef struct RumTuplesortstate RumTuplesortstate; struct RumScanItem; -/* - * We provide multiple interfaces to what is essentially the same code, - * since different callers have different data to be sorted and want to - * specify the sort key information differently. There are two APIs for - * sorting HeapTuples and two more for sorting IndexTuples. Yet another - * API supports sorting bare Datums. - * - * The "heap" API actually stores/sorts MinimalTuples, which means it doesn't - * preserve the system columns (tuple identity and transaction visibility - * info). The sort keys are specified by column numbers within the tuples - * and sort operator OIDs. We save some cycles by passing and returning the - * tuples in TupleTableSlots, rather than forming actual HeapTuples (which'd - * have to be converted to MinimalTuples). This API works well for sorts - * executed as parts of plan trees. - * - * The "cluster" API stores/sorts full HeapTuples including all visibility - * info. The sort keys are specified by reference to a btree index that is - * defined on the relation to be sorted. Note that putheaptuple/getheaptuple - * go with this API, not the "begin_heap" one! - * - * The "index_btree" API stores/sorts IndexTuples (preserving all their - * header fields). The sort keys are specified by a btree index definition. - * - * The "index_hash" API is similar to index_btree, but the tuples are - * actually sorted by their hash codes not the raw data. - */ - typedef struct { ItemPointerData iptr; @@ -69,52 +38,17 @@ typedef struct #define RumSortItemSize(nKeys) (offsetof(RumSortItem,data)+(nKeys)*sizeof(float8)) extern MemoryContext rum_tuplesort_get_memorycontext(RumTuplesortstate *state); -extern RumTuplesortstate *rum_tuplesort_begin_heap(TupleDesc tupDesc, - int nkeys, AttrNumber *attNums, - Oid *sortOperators, Oid *sortCollations, - bool *nullsFirstFlags, - int workMem, bool randomAccess); -extern RumTuplesortstate *rum_tuplesort_begin_cluster(TupleDesc tupDesc, - Relation indexRel, - int workMem, bool randomAccess); -extern RumTuplesortstate *rum_tuplesort_begin_index_btree(Relation heapRel, - Relation indexRel, - bool enforceUnique, - int workMem, bool randomAccess); -extern RumTuplesortstate *rum_tuplesort_begin_index_hash(Relation heapRel, - Relation indexRel, - uint32 hash_mask, - int workMem, bool randomAccess); -extern RumTuplesortstate *rum_tuplesort_begin_datum(Oid datumType, - Oid sortOperator, Oid sortCollation, - bool nullsFirstFlag, - int workMem, bool randomAccess); + extern RumTuplesortstate *rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer); extern RumTuplesortstate *rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp); -extern void rum_tuplesort_set_bound(RumTuplesortstate *state, int64 bound); - -extern void rum_tuplesort_puttupleslot(RumTuplesortstate *state, - TupleTableSlot *slot); -extern void rum_tuplesort_putheaptuple(RumTuplesortstate *state, HeapTuple tup); -extern void rum_tuplesort_putindextuple(RumTuplesortstate *state, IndexTuple tuple); -extern void rum_tuplesort_putdatum(RumTuplesortstate *state, Datum val, - bool isNull); extern void rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem * item); extern void rum_tuplesort_putrumitem(RumTuplesortstate *state, struct RumScanItem * item); extern void rum_tuplesort_performsort(RumTuplesortstate *state); -extern bool rum_tuplesort_gettupleslot(RumTuplesortstate *state, bool forward, - TupleTableSlot *slot); -extern HeapTuple rum_tuplesort_getheaptuple(RumTuplesortstate *state, bool forward, - bool *should_free); -extern IndexTuple rum_tuplesort_getindextuple(RumTuplesortstate *state, bool forward, - bool *should_free); -extern bool rum_tuplesort_getdatum(RumTuplesortstate *state, bool forward, - Datum *val, bool *isNull); extern RumSortItem *rum_tuplesort_getrum(RumTuplesortstate *state, bool forward, bool *should_free); extern struct RumScanItem *rum_tuplesort_getrumitem(RumTuplesortstate *state, bool forward, @@ -122,21 +56,6 @@ extern struct RumScanItem *rum_tuplesort_getrumitem(RumTuplesortstate *state, bo extern void rum_tuplesort_end(RumTuplesortstate *state); -extern void rum_tuplesort_get_stats(RumTuplesortstate *state, - const char **sortMethod, - const char **spaceType, - long *spaceUsed); - extern int rum_tuplesort_merge_order(long allowedMem); -/* - * These routines may only be called if randomAccess was specified 'true'. - * Likewise, backwards scan in gettuple/getdatum is only allowed if - * randomAccess was specified. - */ - -extern void rum_tuplesort_rescan(RumTuplesortstate *state); -extern void rum_tuplesort_markpos(RumTuplesortstate *state); -extern void rum_tuplesort_restorepos(RumTuplesortstate *state); - #endif /* RUMSORT_H */ diff --git a/src/rumutil.c b/src/rumutil.c index bca37c892a..d7401673fa 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -1057,8 +1057,40 @@ FunctionCall10Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, Datum arg6, Datum arg7, Datum arg8, Datum arg9, Datum arg10) { - FunctionCallInfoData fcinfo; Datum result; +#if PG_VERSION_NUM >= 120000 + LOCAL_FCINFO(fcinfo, 10); + + InitFunctionCallInfoData(*fcinfo, flinfo, 10, collation, NULL, NULL); + + fcinfo->args[0].value = arg1; + fcinfo->args[0].isnull = false; + fcinfo->args[1].value = arg2; + fcinfo->args[1].isnull = false; + fcinfo->args[2].value = arg3; + fcinfo->args[2].isnull = false; + fcinfo->args[3].value = arg4; + fcinfo->args[3].isnull = false; + fcinfo->args[4].value = arg5; + fcinfo->args[4].isnull = false; + fcinfo->args[5].value = arg6; + fcinfo->args[5].isnull = false; + fcinfo->args[6].value = arg7; + fcinfo->args[6].isnull = false; + fcinfo->args[7].value = arg8; + fcinfo->args[7].isnull = false; + fcinfo->args[8].value = arg9; + fcinfo->args[8].isnull = false; + fcinfo->args[9].value = arg10; + fcinfo->args[9].isnull = false; + + result = FunctionCallInvoke(fcinfo); + + /* Check for null result, since caller is clearly not expecting one */ + if (fcinfo->isnull) + elog(ERROR, "function %u returned NULL", fcinfo->flinfo->fn_oid); +#else + FunctionCallInfoData fcinfo; InitFunctionCallInfoData(fcinfo, flinfo, 10, collation, NULL, NULL); @@ -1088,6 +1120,7 @@ FunctionCall10Coll(FmgrInfo *flinfo, Oid collation, Datum arg1, Datum arg2, /* Check for null result, since caller is clearly not expecting one */ if (fcinfo.isnull) elog(ERROR, "function %u returned NULL", fcinfo.flinfo->fn_oid); +#endif return result; } diff --git a/tests/README.md b/tests/README.md index de04c4d617..77b7d78e88 100644 --- a/tests/README.md +++ b/tests/README.md @@ -3,7 +3,7 @@ Install testgres: ``` -pip install testgres +pip install testgres==0.4.0 ``` Run command: From d98b9f1b9f262af4b90b15f5645c95ddf76371e6 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Wed, 19 Jun 2019 15:52:26 +0300 Subject: [PATCH 062/182] Update copyrights --- LICENSE | 4 ++-- src/rum.h | 2 +- src/rum_arr_utils.c | 2 +- src/rum_ts_utils.c | 2 +- src/rumbtree.c | 2 +- src/rumbulk.c | 2 +- src/rumdatapage.c | 2 +- src/rumentrypage.c | 2 +- src/rumget.c | 2 +- src/ruminsert.c | 2 +- src/rumscan.c | 2 +- src/rumtsquery.c | 2 +- src/rumutil.c | 2 +- src/rumvacuum.c | 2 +- src/rumvalidate.c | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/LICENSE b/LICENSE index c786b781e6..5b55f87617 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ RUM is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. -Copyright (c) 2015-2018, Postgres Professional +Portions Copyright (c) 2015-2019, Postgres Professional Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group Portions Copyright (c) 1994, The Regents of the University of California @@ -8,4 +8,4 @@ Permission to use, copy, modify, and distribute this software and its documentat IN NO EVENT SHALL POSTGRES PROFESSIONAL BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF POSTGRES PROFESSIONAL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. \ No newline at end of file +POSTGRES PROFESSIONAL SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND POSTGRES PROFESSIONAL HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. diff --git a/src/rum.h b/src/rum.h index ad0fa47e6e..36f2e21f4b 100644 --- a/src/rum.h +++ b/src/rum.h @@ -3,7 +3,7 @@ * rum.h * Exported definitions for RUM index. * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 2006-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index da3f2f3c52..86fab36074 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -3,7 +3,7 @@ * rum_arr_utils.c * various anyarray-search functions * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 073f90bdfe..90893f2825 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -3,7 +3,7 @@ * rum_ts_utils.c * various text-search functions * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumbtree.c b/src/rumbtree.c index c2106eeab2..2e1e520df9 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumbulk.c b/src/rumbulk.c index 3c07ef5850..b9e94df375 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 19f9c1a81f..66049e8bf7 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumentrypage.c b/src/rumentrypage.c index e87d8749b8..10334ec862 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumget.c b/src/rumget.c index 0a77293e05..31840cf11a 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -4,7 +4,7 @@ * fetch tuples from a RUM scan. * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/ruminsert.c b/src/ruminsert.c index dad7b33b3a..e34a247a1d 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumscan.c b/src/rumscan.c index 989d0d1ece..81713ac512 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 205526ff37..747f98f2cf 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -3,7 +3,7 @@ * rumtsquery.c * Inverted fulltext search: indexing tsqueries. * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumutil.c b/src/rumutil.c index d7401673fa..2b2af134fb 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 1285c9d4ed..35cca00308 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres RUM * * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvalidate.c b/src/rumvalidate.c index 39c2f5c1e1..1d73e1b73b 100644 --- a/src/rumvalidate.c +++ b/src/rumvalidate.c @@ -3,7 +3,7 @@ * rumvalidate.c * Opclass validator for RUM. * - * Portions Copyright (c) 2015-2016, Postgres Professional + * Portions Copyright (c) 2015-2019, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * From 830f559351b1478a880ad22502e48f668e11215d Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 27 Jun 2019 11:14:13 +0300 Subject: [PATCH 063/182] Issue #61: Fix 32-bit result for array test --- expected/array_1.out | 30 ++++++++++++++++++------------ src/rumsort.c | 5 +++++ 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/expected/array_1.out b/expected/array_1.out index e88ae50589..ec3abfb693 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -851,20 +851,26 @@ SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> Order By: (i <=> '{51}'::smallint[]) (3 rows) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - i | ?column? +SELECT i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + i | distance ---------------------+------------------ {20,23,51} | 1.73205080756888 {33,51,20,77,23,65} | 2.44948974278318 - {23,76,34,23,2,20} | Infinity - {20,60,45,23,29} | Infinity - {23,89,38,20,40,95} | Infinity - {23,20,72} | Infinity - {73,23,20} | Infinity - {6,97,20,89,23} | Infinity - {20,98,30,23,1,66} | Infinity - {57,23,39,46,50,20} | Infinity - {81,20,26,22,23} | Infinity - {18,23,10,90,15,20} | Infinity + {23,76,34,23,2,20} | -1 + {20,60,45,23,29} | -1 + {23,89,38,20,40,95} | -1 + {23,20,72} | -1 + {73,23,20} | -1 + {6,97,20,89,23} | -1 + {20,98,30,23,1,66} | -1 + {57,23,39,46,50,20} | -1 + {81,20,26,22,23} | -1 + {18,23,10,90,15,20} | -1 (12 rows) diff --git a/src/rumsort.c b/src/rumsort.c index 8d6e8fe08e..6f4425c9ed 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -57,6 +57,11 @@ TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5) * Below are copied definitions from src/backend/utils/sort/tuplesort.c. */ +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + typedef struct { void *tuple; /* the tuple proper */ From 32625a763ddebc1a22ba9bbddc97e3cf8d472e8b Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 1 Jul 2019 15:30:26 +0300 Subject: [PATCH 064/182] Disable dtrace for rumsort.c --- src/rumsort.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 6f4425c9ed..8aa10fef4f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -24,7 +24,6 @@ #include "utils/guc.h" #include "utils/logtape.h" #include "utils/pg_rusage.h" -#include "utils/probes.h" #include "rum.h" /* RumItem */ @@ -40,14 +39,6 @@ #define LogicalTapeRewindForWrite(x, y) LogicalTapeRewind((x), (y), true) #endif -#if PG_VERSION_NUM >= 110000 -#define RUM_SORT_START(INT1, INT2, INT3, INT4, INT5) \ -TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5, false) -#else -#define RUM_SORT_START(INT1, INT2, INT3, INT4, INT5) \ -TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5) -#endif - #if PG_VERSION_NUM >= 110000 #define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, NULL, NULL, 1) #define LogicalTapeFreeze(X, Y) LogicalTapeFreeze(X, Y, NULL) @@ -745,12 +736,6 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, state->nKeys = nKeys; - RUM_SORT_START(INDEX_SORT, - false, /* no unique check */ - state->nKeys, - workMem, - randomAccess); - state->comparetup = comparetup_rum; state->copytup = copytup_rum; state->writetup = writetup_rum; @@ -778,12 +763,6 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) "begin rumitem sort: workMem = %d", workMem); #endif - RUM_SORT_START(INDEX_SORT, - false, /* no unique check */ - 2, - workMem, - false); - state->cmp = cmp; state->comparetup = comparetup_rumitem; state->copytup = copytup_rumitem; @@ -841,15 +820,6 @@ rum_tuplesort_end(RumTuplesortstate *state) elog(LOG, "internal sort ended, %ld KB used: %s", spaceUsed, pg_rusage_show(&state->ru_start)); } - - TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); -#else - - /* - * If you disabled TRACE_SORT, you can still probe sort__done, but you - * ain't getting space-used stats. - */ - TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); #endif /* Free any execution state created for CLUSTER case */ From ee68be14026f4faba6fb20a54838a8e0aa3669f4 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 2 Jul 2019 14:36:58 +0300 Subject: [PATCH 065/182] Issue #61: Invalidate scan->xs_heaptid by hand. Earlier it was done by invalidating scan->xs_ctup by RelationGetIndexScan(). --- src/rumscan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/rumscan.c b/src/rumscan.c index 81713ac512..34de9d4fc4 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -41,6 +41,13 @@ rumbeginscan(Relation rel, int nkeys, int norderbys) initRumState(&so->rumstate, scan->indexRelation); +#if PG_VERSION_NUM >= 120000 + /* + * Starting from PG 12 we need to invalidate result's item pointer. Earlier + * it was done by invalidating scan->xs_ctup by RelationGetIndexScan(). + */ + ItemPointerSetInvalid(&scan->xs_heaptid); +#endif scan->opaque = so; return scan; From e34375aa36692003cc3a3bc7ec84c252831128c7 Mon Sep 17 00:00:00 2001 From: Akenteva Anna Date: Thu, 11 Jul 2019 18:08:16 +0300 Subject: [PATCH 066/182] PGPRO-2844: Use macros PG_INT32_MAX instead of magic number --- src/rum_ts_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 90893f2825..4cfda6b80a 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -873,7 +873,7 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, memset(qr->operandData, 0, sizeof(qr->operandData[0]) * qr->length); - ext->p = 0x7fffffff; + ext->p = PG_INT32_MAX; ext->q = 0; ptr = doc + ext->pos; From cd71e51ca6ca4de60933ef2b62b5db4039b3b1a2 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 17 Jan 2020 19:57:50 +0300 Subject: [PATCH 067/182] [PGPRO-3442] Correct initialize scan. --- src/rumget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/rumget.c b/src/rumget.c index 31840cf11a..32cb13eb94 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -2373,6 +2373,7 @@ rumgettuple(IndexScanDesc scan, ScanDirection direction) rumNewScanKey(scan); so->firstCall = false; + ItemPointerSetInvalid(&GET_SCAN_TID(scan)); if (RumIsVoidRes(scan)) return false; From 277014f9f8db03dd3c2624c6cd1a6f39821fea29 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Thu, 30 Jan 2020 15:58:33 +0300 Subject: [PATCH 068/182] Update regression tests for 32-bit system --- expected/array_1.out | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/expected/array_1.out b/expected/array_1.out index e88ae50589..ec3abfb693 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -851,20 +851,26 @@ SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> Order By: (i <=> '{51}'::smallint[]) (3 rows) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - i | ?column? +SELECT i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + i | distance ---------------------+------------------ {20,23,51} | 1.73205080756888 {33,51,20,77,23,65} | 2.44948974278318 - {23,76,34,23,2,20} | Infinity - {20,60,45,23,29} | Infinity - {23,89,38,20,40,95} | Infinity - {23,20,72} | Infinity - {73,23,20} | Infinity - {6,97,20,89,23} | Infinity - {20,98,30,23,1,66} | Infinity - {57,23,39,46,50,20} | Infinity - {81,20,26,22,23} | Infinity - {18,23,10,90,15,20} | Infinity + {23,76,34,23,2,20} | -1 + {20,60,45,23,29} | -1 + {23,89,38,20,40,95} | -1 + {23,20,72} | -1 + {73,23,20} | -1 + {6,97,20,89,23} | -1 + {20,98,30,23,1,66} | -1 + {57,23,39,46,50,20} | -1 + {81,20,26,22,23} | -1 + {18,23,10,90,15,20} | -1 (12 rows) From 7de7cbb8b191e31b5920553ae71f7a35f67a3256 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Thu, 2 Apr 2020 11:54:27 +0300 Subject: [PATCH 069/182] Add support of pgsql v13 --- src/ruminsert.c | 13 +++++++++++-- src/rumutil.c | 29 +++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/ruminsert.c b/src/ruminsert.c index e34a247a1d..7315f517f3 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -550,7 +550,13 @@ rumHeapTupleBulkInsert(RumBuildState * buildstate, OffsetNumber attnum, } static void -rumBuildCallback(Relation index, HeapTuple htup, Datum *values, +rumBuildCallback(Relation index, +#if PG_VERSION_NUM < 130000 + HeapTuple htup, +#else + ItemPointer tid, +#endif + Datum *values, bool *isnull, bool tupleIsAlive, void *state) { RumBuildState *buildstate = (RumBuildState *) state; @@ -558,6 +564,9 @@ rumBuildCallback(Relation index, HeapTuple htup, Datum *values, int i; Datum outerAddInfo = (Datum) 0; bool outerAddInfoIsNull = true; +#if PG_VERSION_NUM < 130000 + ItemPointer tid = &htup->t_self; +#endif if (AttributeNumberIsValid(buildstate->rumstate.attrnAttachColumn)) { @@ -570,7 +579,7 @@ rumBuildCallback(Relation index, HeapTuple htup, Datum *values, for (i = 0; i < buildstate->rumstate.origTupdesc->natts; i++) rumHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), values[i], isnull[i], - &htup->t_self, + tid, outerAddInfo, outerAddInfoIsNull); /* If we've maxed out our available memory, dump everything to the index */ diff --git a/src/rumutil.c b/src/rumutil.c index 2b2af134fb..a24c1614ce 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -83,13 +83,25 @@ _PG_init(void) add_string_reloption(rum_relopt_kind, "attach", "Column name to attach as additional info", - NULL, NULL); + NULL, NULL +#if PG_VERSION_NUM >= 130000 + , AccessExclusiveLock +#endif + ); add_string_reloption(rum_relopt_kind, "to", "Column name to add a order by column", - NULL, NULL); + NULL, NULL +#if PG_VERSION_NUM >= 130000 + , AccessExclusiveLock +#endif + ); add_bool_reloption(rum_relopt_kind, "order_by_attach", "Use (addinfo, itempointer) order instead of just itempointer", - false); + false +#if PG_VERSION_NUM >= 130000 + , AccessExclusiveLock +#endif + ); } /* @@ -875,14 +887,15 @@ rumExtractEntries(RumState * rumstate, OffsetNumber attnum, bytea * rumoptions(Datum reloptions, bool validate) { - relopt_value *options; - RumOptions *rdopts; - int numoptions; static const relopt_parse_elt tab[] = { {"attach", RELOPT_TYPE_STRING, offsetof(RumOptions, attachColumn)}, {"to", RELOPT_TYPE_STRING, offsetof(RumOptions, addToColumn)}, {"order_by_attach", RELOPT_TYPE_BOOL, offsetof(RumOptions, useAlternativeOrder)} }; +#if PG_VERSION_NUM < 130000 + relopt_value *options; + RumOptions *rdopts; + int numoptions; options = parseRelOptions(reloptions, validate, rum_relopt_kind, &numoptions); @@ -899,6 +912,10 @@ rumoptions(Datum reloptions, bool validate) pfree(options); return (bytea *) rdopts; +#else + return (bytea *) build_reloptions(reloptions, validate, rum_relopt_kind, + sizeof(RumOptions), tab, lengthof(tab)); +#endif } bool From 289a6c208c67d150fbaa32ad4d591e1daff48f78 Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Thu, 27 Jun 2019 11:14:13 +0300 Subject: [PATCH 070/182] Issue #61: Fix 32-bit result for array test --- src/rumsort.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/rumsort.c b/src/rumsort.c index 8d6e8fe08e..6f4425c9ed 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -57,6 +57,11 @@ TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5) * Below are copied definitions from src/backend/utils/sort/tuplesort.c. */ +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + typedef struct { void *tuple; /* the tuple proper */ From a7a3859b94af06035a1912542974211a419e5eac Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Mon, 1 Jul 2019 15:30:26 +0300 Subject: [PATCH 071/182] Disable dtrace for rumsort.c --- src/rumsort.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 6f4425c9ed..8aa10fef4f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -24,7 +24,6 @@ #include "utils/guc.h" #include "utils/logtape.h" #include "utils/pg_rusage.h" -#include "utils/probes.h" #include "rum.h" /* RumItem */ @@ -40,14 +39,6 @@ #define LogicalTapeRewindForWrite(x, y) LogicalTapeRewind((x), (y), true) #endif -#if PG_VERSION_NUM >= 110000 -#define RUM_SORT_START(INT1, INT2, INT3, INT4, INT5) \ -TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5, false) -#else -#define RUM_SORT_START(INT1, INT2, INT3, INT4, INT5) \ -TRACE_POSTGRESQL_SORT_START(INT1, INT2, INT3, INT4, INT5) -#endif - #if PG_VERSION_NUM >= 110000 #define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, NULL, NULL, 1) #define LogicalTapeFreeze(X, Y) LogicalTapeFreeze(X, Y, NULL) @@ -745,12 +736,6 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, state->nKeys = nKeys; - RUM_SORT_START(INDEX_SORT, - false, /* no unique check */ - state->nKeys, - workMem, - randomAccess); - state->comparetup = comparetup_rum; state->copytup = copytup_rum; state->writetup = writetup_rum; @@ -778,12 +763,6 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) "begin rumitem sort: workMem = %d", workMem); #endif - RUM_SORT_START(INDEX_SORT, - false, /* no unique check */ - 2, - workMem, - false); - state->cmp = cmp; state->comparetup = comparetup_rumitem; state->copytup = copytup_rumitem; @@ -841,15 +820,6 @@ rum_tuplesort_end(RumTuplesortstate *state) elog(LOG, "internal sort ended, %ld KB used: %s", spaceUsed, pg_rusage_show(&state->ru_start)); } - - TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); -#else - - /* - * If you disabled TRACE_SORT, you can still probe sort__done, but you - * ain't getting space-used stats. - */ - TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); #endif /* Free any execution state created for CLUSTER case */ From 7b5bb8a997fd03cb57d153df2abb41d9b1842e3d Mon Sep 17 00:00:00 2001 From: Arthur Zakirov Date: Tue, 2 Jul 2019 14:36:58 +0300 Subject: [PATCH 072/182] Issue #61: Invalidate scan->xs_heaptid by hand. Earlier it was done by invalidating scan->xs_ctup by RelationGetIndexScan(). --- src/rumscan.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/rumscan.c b/src/rumscan.c index 81713ac512..34de9d4fc4 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -41,6 +41,13 @@ rumbeginscan(Relation rel, int nkeys, int norderbys) initRumState(&so->rumstate, scan->indexRelation); +#if PG_VERSION_NUM >= 120000 + /* + * Starting from PG 12 we need to invalidate result's item pointer. Earlier + * it was done by invalidating scan->xs_ctup by RelationGetIndexScan(). + */ + ItemPointerSetInvalid(&scan->xs_heaptid); +#endif scan->opaque = so; return scan; From 4f78fbcdadeedf7ea5b7719a18c68e1180a58eff Mon Sep 17 00:00:00 2001 From: Akenteva Anna Date: Thu, 11 Jul 2019 18:08:16 +0300 Subject: [PATCH 073/182] PGPRO-2844: Use macros PG_INT32_MAX instead of magic number --- src/rum_ts_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 90893f2825..4cfda6b80a 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -873,7 +873,7 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, memset(qr->operandData, 0, sizeof(qr->operandData[0]) * qr->length); - ext->p = 0x7fffffff; + ext->p = PG_INT32_MAX; ext->q = 0; ptr = doc + ext->pos; From 3f283b9f66356d77cb0c9b32b53db73f62474e14 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Thu, 2 Apr 2020 11:54:27 +0300 Subject: [PATCH 074/182] Add support of pgsql v13 --- src/ruminsert.c | 13 +++++++++++-- src/rumutil.c | 29 +++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/ruminsert.c b/src/ruminsert.c index e34a247a1d..7315f517f3 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -550,7 +550,13 @@ rumHeapTupleBulkInsert(RumBuildState * buildstate, OffsetNumber attnum, } static void -rumBuildCallback(Relation index, HeapTuple htup, Datum *values, +rumBuildCallback(Relation index, +#if PG_VERSION_NUM < 130000 + HeapTuple htup, +#else + ItemPointer tid, +#endif + Datum *values, bool *isnull, bool tupleIsAlive, void *state) { RumBuildState *buildstate = (RumBuildState *) state; @@ -558,6 +564,9 @@ rumBuildCallback(Relation index, HeapTuple htup, Datum *values, int i; Datum outerAddInfo = (Datum) 0; bool outerAddInfoIsNull = true; +#if PG_VERSION_NUM < 130000 + ItemPointer tid = &htup->t_self; +#endif if (AttributeNumberIsValid(buildstate->rumstate.attrnAttachColumn)) { @@ -570,7 +579,7 @@ rumBuildCallback(Relation index, HeapTuple htup, Datum *values, for (i = 0; i < buildstate->rumstate.origTupdesc->natts; i++) rumHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1), values[i], isnull[i], - &htup->t_self, + tid, outerAddInfo, outerAddInfoIsNull); /* If we've maxed out our available memory, dump everything to the index */ diff --git a/src/rumutil.c b/src/rumutil.c index 2b2af134fb..a24c1614ce 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -83,13 +83,25 @@ _PG_init(void) add_string_reloption(rum_relopt_kind, "attach", "Column name to attach as additional info", - NULL, NULL); + NULL, NULL +#if PG_VERSION_NUM >= 130000 + , AccessExclusiveLock +#endif + ); add_string_reloption(rum_relopt_kind, "to", "Column name to add a order by column", - NULL, NULL); + NULL, NULL +#if PG_VERSION_NUM >= 130000 + , AccessExclusiveLock +#endif + ); add_bool_reloption(rum_relopt_kind, "order_by_attach", "Use (addinfo, itempointer) order instead of just itempointer", - false); + false +#if PG_VERSION_NUM >= 130000 + , AccessExclusiveLock +#endif + ); } /* @@ -875,14 +887,15 @@ rumExtractEntries(RumState * rumstate, OffsetNumber attnum, bytea * rumoptions(Datum reloptions, bool validate) { - relopt_value *options; - RumOptions *rdopts; - int numoptions; static const relopt_parse_elt tab[] = { {"attach", RELOPT_TYPE_STRING, offsetof(RumOptions, attachColumn)}, {"to", RELOPT_TYPE_STRING, offsetof(RumOptions, addToColumn)}, {"order_by_attach", RELOPT_TYPE_BOOL, offsetof(RumOptions, useAlternativeOrder)} }; +#if PG_VERSION_NUM < 130000 + relopt_value *options; + RumOptions *rdopts; + int numoptions; options = parseRelOptions(reloptions, validate, rum_relopt_kind, &numoptions); @@ -899,6 +912,10 @@ rumoptions(Datum reloptions, bool validate) pfree(options); return (bytea *) rdopts; +#else + return (bytea *) build_reloptions(reloptions, validate, rum_relopt_kind, + sizeof(RumOptions), tab, lengthof(tab)); +#endif } bool From 23e34202afb3223b2a1a4aee55ebffa06cbe3101 Mon Sep 17 00:00:00 2001 From: "Ivan N. Taranov" Date: Mon, 6 Apr 2020 14:38:05 +0300 Subject: [PATCH 075/182] PGPRO-3429: +reduce generate_series size --- t/001_wal.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/001_wal.pl b/t/001_wal.pl index 6cd507da86..99415afb63 100644 --- a/t/001_wal.pl +++ b/t/001_wal.pl @@ -71,7 +71,7 @@ sub test_index_replay to_tsvector('simple', array_to_string(array( select substr('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ', trunc(random() * 52)::integer + 1, 1) FROM generate_series(i, i + 4)), '')) - FROM generate_series(1,100000) i;"); + FROM generate_series(1,16000) i;"); $node_master->psql("postgres", "CREATE INDEX rumidx ON tst USING rum (t rum_tsvector_ops);"); # Test that queries give same result From 2122dbd929adf1bcf410fbdb9a74c6a512beff5f Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 23 Jun 2020 18:38:55 +0400 Subject: [PATCH 076/182] RUM weights patch - combined prev. changes --- Makefile | 2 +- data/rum_weight.data | 52 ++++ expected/rum_weight.out | 136 ++++++++ sql/rum_weight.sql | 44 +++ src/rum_ts_utils.c | 673 +++++++++++++++++++++++++++++++++++++--- 5 files changed, 856 insertions(+), 51 deletions(-) create mode 100644 data/rum_weight.data create mode 100644 expected/rum_weight.out create mode 100644 sql/rum_weight.sql diff --git a/Makefile b/Makefile index 36ae70c8d2..ad8f6cc9d1 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ REGRESS = rum rum_validate rum_hash ruminv timestamp orderby orderby_hash \ int2 int4 int8 float4 float8 money oid \ time timetz date interval \ macaddr inet cidr text varchar char bytea bit varbit \ - numeric + numeric rum_weight ifdef USE_PGXS PG_CONFIG = pg_config diff --git a/data/rum_weight.data b/data/rum_weight.data new file mode 100644 index 0000000000..5bce717c1b --- /dev/null +++ b/data/rum_weight.data @@ -0,0 +1,52 @@ +As a reward for your reformation I write to you on this precious sheet.|write +You see I have come to be wonderfully attached to Heidelberg, the|attached come see +beautiful, the quaint, the historically poetic, learned and picturesque| +old town on the Neckar. It seems like another home. So I could not show|seems show could +my appreciation of you in a more complimentary way than by sending this|sending +little series of pictures. Have you ever been here, I wonder? You did|did have been wonder +not say, but you wrote as if you knew it by sight as well as by heart.|wrote say knew +As I cannot know, I will venture an explanation. The panorama speaks for|know will speaks +itself. Put on your "specs" and look at the castle, half way up the|put look +_berg_, "the Jettenhuhl, a wooded spur of the Konigestuhl." Look at it|Look +from the "Terrasse." Thus you'll get something of an idea of it. The|get +Gesprente Thurm is the one that was blown up by the French. The|is blown was +thickness of the walls, twenty-one feet, and the solid masonry, held it|held +so well that only a fragment, as it were, gave way. It still hangs as if|were gave hangs +ready to be replaced. "Das Grosse Fass Gebaude," too, you will have no|be replaced will have +difficulty in making out. If you only had it with its 49,000 gallons of|making had +wine, but wouldn't you divide with your neighbors! The columns in the|wouldn't divide +portico that shows in the Schlosshof are the four brought from|shows are brought +Charlemagne's palace at Ingelheim by the Count Palatine Ludwig, some| +time between 1508-44. The Zum Ritter has nothing to do with the castle,|has do +but is an ancient structure (1592) in the Renaissance style, and one of|is +the few that escaped destruction in 1693. It is a beautiful, highly|escaped is +ornamental building, and I wish you could see it, if you have not seen|wish could see have seen +it.| +| +All the above information, I beg you to believe, I do not intend you|beg believe do intend +to think was evolved from my inner consciousness, but gathered from|think was evolved gathered +the--nearest guide-book!| +| +I am so much obliged to you for mapping out Switzerland to me. I have|am obliged have +been trying my best to get all those "passes" into my brain. Now, thanks|been trying get +to your letter, I have them all in the handiest kind of a bunch. Ariel|have +like, "I'll do my bidding gently," and as surely, if I get there. But|do bidding get +there are dreadful reports of floods and roads caved in and bridges|are caved +swept away and snows and--enough of such exciting items as sets one|swept sets +thinking--"to go or not to go?" We are this far on the way. Reached|thinking go go are Reached +here this afternoon. Have spent the evening sauntering in the gardens,|Have spent sauntering +the Conversationhaus, the bazaar, mingling with the throng, listening to|mingling listening +the band, and comparing what it is with what it was. It was a gay and|comparing was was +curious spectacle, but on the whole had "the banquet-hall deserted"|had deserted +look. The situation is most beautiful. It lies, you know, at the|is lies know +entrance of the Black Forest, among picturesque, thickly-wooded hills,| +in the valley of the Oos, and extends up the slope of some of the hills.|extends +The Oos is a most turbid, turbulent stream; dashes through part of the|is +town with angry, headlong speed. There is an avenue along its bank of|is +oaks, limes and maples, bordered with flower-beds and shrubberies, and| +adorned with fountains and handsome villas. We shall devote to-morrow to| +seeing all there is to be seen, and go to Strassburg to-morrow evening|seeing is be seen go +for two or three days. From there to Constance, and then hold _our_| +"Council" as to further movements.| +def fgr| +def xxx fgr| diff --git a/expected/rum_weight.out b/expected/rum_weight.out new file mode 100644 index 0000000000..0c1565d1ce --- /dev/null +++ b/expected/rum_weight.out @@ -0,0 +1,136 @@ +CREATE TABLE testweight_rum( t text, a tsvector, r text ); +CREATE FUNCTION fill_weight_trigger() RETURNS trigger AS $$ +begin + new.a := + setweight(to_tsvector('pg_catalog.english', coalesce(new.r,'')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.t,'')), 'D'); + return new; +end +$$ LANGUAGE plpgsql; +CREATE TRIGGER tsvectorweightupdate +BEFORE INSERT OR UPDATE ON testweight_rum +FOR EACH ROW EXECUTE PROCEDURE fill_weight_trigger(); +CREATE INDEX rumidx_weight ON testweight_rum USING rum (a rum_tsvector_ops); +\copy testweight_rum(t,r) from 'data/rum_weight.data' DELIMITER '|' ; +SET enable_seqscan=off; +SET enable_indexscan=off; +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'ever:A|wrote'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'have:A&wish:DAC'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'have:A&wish:DAC'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'among:ABC'); + count +------- + 0 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'structure:D&ancient:BCD'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '(complimentary:DC|sight)&(sending:ABC|heart)'); + count +------- + 2 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '!gave:D & way'); + count +------- + 3 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '(go<->go:a)&(think:d<->go)'); + count +------- + 0 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '(go<->go:a)&(think:d<2>go)'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'go & (!reach:a | way<->reach)'); + count +------- + 2 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'go & (!reach:a & way<->reach)'); + count +------- + 0 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d & go & !way:a'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'show:d & seem & !town:a'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '!way:a'); + count +------- + 52 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'go & !way:a'); + count +------- + 2 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d & !way:a'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d & go'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'think<->go:d | go<->see'); + count +------- + 1 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d<->think'); + count +------- + 0 +(1 row) + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach<->think'); + count +------- + 1 +(1 row) + diff --git a/sql/rum_weight.sql b/sql/rum_weight.sql new file mode 100644 index 0000000000..3fcee8b06e --- /dev/null +++ b/sql/rum_weight.sql @@ -0,0 +1,44 @@ +CREATE TABLE testweight_rum( t text, a tsvector, r text ); + +CREATE FUNCTION fill_weight_trigger() RETURNS trigger AS $$ +begin + new.a := + setweight(to_tsvector('pg_catalog.english', coalesce(new.r,'')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.t,'')), 'D'); + return new; +end +$$ LANGUAGE plpgsql; + +CREATE TRIGGER tsvectorweightupdate +BEFORE INSERT OR UPDATE ON testweight_rum +FOR EACH ROW EXECUTE PROCEDURE fill_weight_trigger(); + +CREATE INDEX rumidx_weight ON testweight_rum USING rum (a rum_tsvector_ops); + +\copy testweight_rum(t,r) from 'data/rum_weight.data' DELIMITER '|' ; + +SET enable_seqscan=off; +SET enable_indexscan=off; + +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'ever:A|wrote'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'have:A&wish:DAC'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'have:A&wish:DAC'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'among:ABC'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'structure:D&ancient:BCD'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '(complimentary:DC|sight)&(sending:ABC|heart)'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '!gave:D & way'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '(go<->go:a)&(think:d<->go)'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '(go<->go:a)&(think:d<2>go)'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'go & (!reach:a | way<->reach)'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'go & (!reach:a & way<->reach)'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d & go & !way:a'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'show:d & seem & !town:a'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', '!way:a'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'go & !way:a'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d & !way:a'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d & go'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'think<->go:d | go<->see'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach:d<->think'); +SELECT count(*) FROM testweight_rum WHERE a @@ to_tsquery('pg_catalog.english', 'reach<->think'); + + diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 4cfda6b80a..2f1ea074aa 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -34,6 +34,16 @@ #define TS_EXEC_PHRASE_NO_POS TS_EXEC_PHRASE_AS_AND #endif +#ifndef TSTernaryValue +typedef enum +{ + TS_NO, /* definitely no match */ + TS_YES, /* definitely does match */ + TS_MAYBE /* can't verify match for lack of pos data */ +} TSTernaryValue; +typedef TSTernaryValue (*TSExecuteCallbackTernary) (void *arg, QueryOperand *val, ExecPhraseData *data); +#endif + PG_FUNCTION_INFO_V1(rum_extract_tsvector); PG_FUNCTION_INFO_V1(rum_extract_tsvector_hash); PG_FUNCTION_INFO_V1(rum_extract_tsquery); @@ -60,6 +70,20 @@ static Datum build_tsvector_hash_entry(TSVector vector, WordEntry *we); static Datum build_tsquery_entry(TSQuery query, QueryOperand *operand); static Datum build_tsquery_hash_entry(TSQuery query, QueryOperand *operand); +static TSTernaryValue +rum_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, + int emit, + int Loffset, + int Roffset, + int max_npos); +static TSTernaryValue +rum_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallbackTernary chkcond, + ExecPhraseData *data); +static TSTernaryValue +rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallbackTernary chkcond); + typedef Datum (*TSVectorEntryBuilder)(TSVector vector, WordEntry *we); typedef Datum (*TSQueryEntryBuilder)(TSQuery query, QueryOperand *operand); @@ -148,6 +172,8 @@ static WordEntryPosVector POSNULL = { #define RANK_NORM_RDIVRPLUS1 0x20 #define DEF_NORM_METHOD RANK_NO_NORM +#define TS_EXEC_IN_NEG 0x04 + #define QR_GET_OPERAND(q, v) \ (&((q)->operandData[ ((QueryItem*)(v)) - GETQUERY((q)->query) ])) @@ -202,63 +228,609 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) PG_RETURN_BOOL(res); } -static bool + +static TSTernaryValue checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) { RumChkVal *gcv = (RumChkVal *) checkval; int j; - /* if any val requiring a weight is used, set recheck flag */ - if (val->weight != 0) - *(gcv->need_recheck) = true; - /* convert item's number to corresponding entry's (operand's) number */ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; - /* return presence of current entry in indexed value */ if (!gcv->check[j]) - return false; + /* lexeme not present in indexed value */ + return TS_NO; - /* - * Fill position list for phrase operator if it's needed end it exists - */ - if (data) + else if (gcv->addInfo && gcv->addInfoIsNull[j] == false) { - /* caller wants an array of positions (phrase search) */ + bytea *positions; + int32 i; + char *ptrt; + WordEntryPos post; + + post = 0; + int32 npos; + int32 k = 0; + /* + * we don't have positions in index because we store a timestamp in + * addInfo + */ if (gcv->recheckPhrase) + return ((val->weight) ? TS_MAYBE : TS_YES); + + positions = DatumGetByteaP(gcv->addInfo[j]); + ptrt = (char *) VARDATA_ANY(positions); + npos = count_pos(VARDATA_ANY(positions), + VARSIZE_ANY_EXHDR(positions)); + + /* caller wants an array of positions (phrase search) */ + if (data) { - /* - * we don't have a positions because we store a timestamp in - * addInfo - */ - *(gcv->need_recheck) = true; - } - else if (gcv->addInfo && gcv->addInfoIsNull[j] == false) - { - bytea *positions; - int32 i; - char *ptrt; - WordEntryPos post; - - positions = DatumGetByteaP(gcv->addInfo[j]); - data->npos = count_pos(VARDATA_ANY(positions), - VARSIZE_ANY_EXHDR(positions)); - data->pos = palloc(sizeof(*data->pos) * data->npos); + const int32 itemsize = sizeof(*data->pos); + + data->pos = palloc(itemsize * npos); data->allocated = true; - ptrt = (char *) VARDATA_ANY(positions); - post = 0; + /* Fill positions that has right weight to return to a caller */ + for (i = 0; i < npos; i++) + { + ptrt = decompress_pos(ptrt, &post); + + /* + * Weight mark is stored as 2 bits inside position mark in RUM + * index. We compare it to a list of requested positions in + * query operand (4 bits one for each weight mark). + */ + if ((val->weight == 0) || (val->weight >> WEP_GETWEIGHT(post)) & 1) + { + data->pos[k] = post; + k++; + } + } + data->npos = k; + data->pos = repalloc(data->pos, itemsize * k); + return (k ? TS_YES : TS_NO); + } + + /* + * Not phrase search. We only need to know if there's at least one + * position with right weight then return TS_YES, otherwise return + * TS_NO. For this search work without recheck we need that any + * negation in recursion will give TS_MAYBE and initiate recheck as + * "!word:A" can mean both: "word:BCÐ’" or "!word" + */ + else if (val->weight == 0) + /* Query without weigths */ + return TS_YES; + else + { + char KeyWeightsMask = 0; - for (i = 0; i < data->npos; i++) + /* Fill KeyWeightMask contains with weigths from all positions */ + for (i = 0; i < npos; i++) { ptrt = decompress_pos(ptrt, &post); - data->pos[i] = post; + KeyWeightsMask |= 1 << WEP_GETWEIGHT(post); + } + return ((KeyWeightsMask & val->weight) ? TS_YES : TS_NO); + } + } +/* Should never come here */ + return TS_MAYBE; +} + +/* + * Compute output position list for a tsquery operator in phrase mode. + * + * Merge the position lists in Ldata and Rdata as specified by "emit", + * returning the result list into *data. The input position lists must be + * sorted and unique, and the output will be as well. + * + * data: pointer to initially-all-zeroes output struct, or NULL + * Ldata, Rdata: input position lists + * emit: bitmask of TSPO_XXX flags + * Loffset: offset to be added to Ldata positions before comparing/outputting + * Roffset: offset to be added to Rdata positions before comparing/outputting + * max_npos: maximum possible required size of output position array + * + * Loffset and Roffset should not be negative, else we risk trying to output + * negative positions, which won't fit into WordEntryPos. + * + * The result is boolean (TS_YES or TS_NO), but for the caller's convenience + * we return it as TSTernaryValue. + * + * Returns TS_YES if any positions were emitted to *data; or if data is NULL, + * returns TS_YES if any positions would have been emitted. + */ +#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */ +#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */ +#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */ + +static TSTernaryValue +rum_phrase_output(ExecPhraseData *data, + ExecPhraseData *Ldata, + ExecPhraseData *Rdata, + int emit, + int Loffset, + int Roffset, + int max_npos) +{ + int Lindex, + Rindex; + + /* Loop until both inputs are exhausted */ + Lindex = Rindex = 0; + while (Lindex < Ldata->npos || Rindex < Rdata->npos) + { + int Lpos, + Rpos; + int output_pos = 0; + + /* + * Fetch current values to compare. WEP_GETPOS() is needed because + * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. + */ + if (Lindex < Ldata->npos) + Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset; + else + { + /* L array exhausted, so we're done if R_ONLY isn't set */ + if (!(emit & TSPO_R_ONLY)) + break; + Lpos = INT_MAX; + } + if (Rindex < Rdata->npos) + Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset; + else + { + /* R array exhausted, so we're done if L_ONLY isn't set */ + if (!(emit & TSPO_L_ONLY)) + break; + Rpos = INT_MAX; + } + + /* Merge-join the two input lists */ + if (Lpos < Rpos) + { + /* Lpos is not matched in Rdata, should we output it? */ + if (emit & TSPO_L_ONLY) + output_pos = Lpos; + Lindex++; + } + else if (Lpos == Rpos) + { + /* Lpos and Rpos match ... should we output it? */ + if (emit & TSPO_BOTH) + output_pos = Rpos; + Lindex++; + Rindex++; + } + else /* Lpos > Rpos */ + { + /* Rpos is not matched in Ldata, should we output it? */ + if (emit & TSPO_R_ONLY) + output_pos = Rpos; + Rindex++; + } + + if (output_pos > 0) + { + if (data) + { + /* Store position, first allocating output array if needed */ + if (data->pos == NULL) + { + data->pos = (WordEntryPos *) + palloc(max_npos * sizeof(WordEntryPos)); + data->allocated = true; + } + data->pos[data->npos++] = output_pos; + } + else + { + /* + * Exact positions not needed, so return TS_YES as soon as we + * know there is at least one. + */ + return TS_YES; } } } - return true; + if (data && data->npos > 0) + { + /* Let's assert we didn't overrun the array */ + Assert(data->npos <= max_npos); + return TS_YES; + } + return TS_NO; +} + +/* + * Execute tsquery at or below an OP_PHRASE operator. + * + * This handles tsquery execution at recursion levels where we need to care + * about match locations. + * + * In addition to the same arguments used for TS_execute, the caller may pass + * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme + * match position info on success. data == NULL if no position data need be + * returned. (In practice, outside callers pass NULL, and only the internal + * recursion cases pass a data pointer.) + * Note: the function assumes data != NULL for operators other than OP_PHRASE. + * This is OK because an outside call always starts from an OP_PHRASE node. + * + * The detailed semantics of the match data, given that the function returned + * TS_YES (successful match), are: + * + * npos > 0, negate = false: + * query is matched at specified position(s) (and only those positions) + * npos > 0, negate = true: + * query is matched at all positions *except* specified position(s) + * npos = 0, negate = true: + * query is matched at all positions + * npos = 0, negate = false: + * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate) + * + * Successful matches also return a "width" value which is the match width in + * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches, + * and is the sum of the phrase operator distances for phrase matches. Note + * that when width > 0, the listed positions represent the ends of matches not + * the starts. (This unintuitive rule is needed to avoid possibly generating + * negative positions, which wouldn't fit into the WordEntryPos arrays.) + * + * If the TSExecuteCallback function reports that an operand is present + * but fails to provide position(s) for it, we will return TS_MAYBE when + * it is possible but not certain that the query is matched. + * + * When the function returns TS_NO or TS_MAYBE, it must return npos = 0, + * negate = false (which is the state initialized by the caller); but the + * "width" output in such cases is undefined. + */ +static TSTernaryValue +rum_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallbackTernary chkcond, + ExecPhraseData *data) +{ + ExecPhraseData Ldata, + Rdata; + TSTernaryValue lmatch, + rmatch; + int Loffset, + Roffset, + maxwidth; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (curitem->type == QI_VAL) + return (chkcond(arg, (QueryOperand *) curitem, data)); + + switch (curitem->qoperator.oper) + { + case OP_NOT: + + /* + * We need not touch data->width, since a NOT operation does not + * change the match width. + */ + if (!(flags & TS_EXEC_CALC_NOT)) + { + /* without CALC_NOT, report NOT as "match everywhere" */ + Assert(data->npos == 0 && !data->negate); + data->negate = true; + return TS_YES; + } + switch (rum_phrase_execute(curitem + 1, arg, flags, chkcond, data)) + { + case TS_NO: + /* change "match nowhere" to "match everywhere" */ + Assert(data->npos == 0 && !data->negate); + data->negate = true; + return TS_YES; + case TS_YES: + if (data->npos > 0) + { + /* we have some positions, invert negate flag */ + data->negate = !data->negate; + return TS_YES; + } + else if (data->negate) + { + /* change "match everywhere" to "match nowhere" */ + data->negate = false; + return TS_NO; + } + /* Should not get here if result was TS_YES */ + Assert(false); + break; + case TS_MAYBE: + /* match positions are, and remain, uncertain */ + return TS_MAYBE; + } + break; + + case OP_PHRASE: + case OP_AND: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); + + lmatch = rum_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata); + if (lmatch == TS_NO) + return TS_NO; + + rmatch = rum_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata); + if (rmatch == TS_NO) + return TS_NO; + + /* + * If either operand has no position information, then we can't + * return reliable position data, only a MAYBE result. + */ + if (lmatch == TS_MAYBE || rmatch == TS_MAYBE) + return TS_MAYBE; + + if (curitem->qoperator.oper == OP_PHRASE) + { + /* + * Compute Loffset and Roffset suitable for phrase match, and + * compute overall width of whole phrase match. + */ + Loffset = curitem->qoperator.distance + Rdata.width; + Roffset = 0; + if (data) + data->width = curitem->qoperator.distance + + Ldata.width + Rdata.width; + } + else + { + /* + * For OP_AND, set output width and alignment like OP_OR (see + * comment below) + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + if (data) + data->width = maxwidth; + } + + if (Ldata.negate && Rdata.negate) + { + /* !L & !R: treat as !(L | R) */ + (void) rum_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + if (data) + data->negate = true; + return TS_YES; + } + else if (Ldata.negate) + { + /* !L & R */ + return rum_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + } + else if (Rdata.negate) + { + /* L & !R */ + return rum_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + } + else + { + /* straight AND */ + return rum_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + } + + case OP_OR: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); + + lmatch = rum_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata); + rmatch = rum_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata); + + if (lmatch == TS_NO && rmatch == TS_NO) + return TS_NO; + + /* + * If either operand has no position information, then we can't + * return reliable position data, only a MAYBE result. + */ + if (lmatch == TS_MAYBE || rmatch == TS_MAYBE) + return TS_MAYBE; + + /* + * Cope with undefined output width from failed submatch. (This + * takes less code than trying to ensure that all failure returns + * et data->width to zero.) + */ + if (lmatch == TS_NO) + Ldata.width = 0; + if (rmatch == TS_NO) + Rdata.width = 0; + + /* + * For OP_AND and OP_OR, report the width of the wider of the two + * inputs, and align the narrower input's positions to the right + * end of that width. This rule deals at least somewhat + * reasonably with cases like "x <-> (y | z <-> q)". + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + data->width = maxwidth; + + if (Ldata.negate && Rdata.negate) + { + /* !L | !R: treat as !(L & R) */ + (void) rum_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + data->negate = true; + return TS_YES; + } + else if (Ldata.negate) + { + /* !L | R: treat as !(L & !R) */ + (void) rum_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + data->negate = true; + return TS_YES; + } + else if (Rdata.negate) + { + /* L | !R: treat as !(!L & R) */ + (void) rum_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + data->negate = true; + return TS_YES; + } + else + { + /* straight OR */ + return rum_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + } + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return TS_NO; +} + +/* + * Evaluates tsquery boolean expression. It is similar to adt/tsvector_op.c + * TS_execute_recurse() but in most cases when ! operator is used it should set + * TS_MAYBE to recheck. The reason is that inside negation we can have one or several + * operands with weights (which we can not easily know) and negative of them is not + * precisely defined i.e. "!word:A" can mean "word:BCD" or "!word" (the same applies to + * logical combination of them). One easily only case we can avoid recheck is when before negation there + * is QI_VAL which doesn't have weight. + * + * curitem: current tsquery item (initially, the first one) + * arg: opaque value to pass through to callback function + * flags: bitmask of flag bits shown in ts_utils.h + * chkcond: callback function to check whether a primitive value is present + */ + +static TSTernaryValue +rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, + TSExecuteCallbackTernary chkcond) +{ + TSTernaryValue lmatch; + + /* since this function recurses, it could be driven to stack overflow */ + check_stack_depth(); + + if (curitem->type == QI_VAL) + { + if ((flags & TS_EXEC_IN_NEG) && curitem->qoperand.weight && + curitem->qoperand.weight != 15) + return TS_MAYBE; + else + return chkcond(arg, (QueryOperand *) curitem, NULL); + } + + switch (curitem->qoperator.oper) + { + case OP_NOT: + if (!(flags & TS_EXEC_CALC_NOT)) + return TS_YES; + switch (rum_TS_execute(curitem + 1, arg, flags | TS_EXEC_IN_NEG, chkcond)) + { + case TS_NO: + return TS_YES; + case TS_YES: + return TS_NO; + case TS_MAYBE: + return TS_MAYBE; + } + break; + + case OP_AND: + lmatch = rum_TS_execute(curitem + curitem->qoperator.left, arg, + flags, chkcond); + if (lmatch == TS_NO) + return TS_NO; + switch (rum_TS_execute(curitem + 1, arg, flags, chkcond)) + { + case TS_NO: + return TS_NO; + case TS_YES: + return lmatch; + case TS_MAYBE: + return TS_MAYBE; + } + break; + + case OP_OR: + lmatch = rum_TS_execute(curitem + curitem->qoperator.left, arg, + flags, chkcond); + if (lmatch == TS_YES) + return TS_YES; + switch (rum_TS_execute(curitem + 1, arg, flags, chkcond)) + { + case TS_NO: + return lmatch; + case TS_YES: + return TS_YES; + case TS_MAYBE: + return TS_MAYBE; + } + break; + + case OP_PHRASE: + + /* + * If we get a MAYBE result, and the caller doesn't want that, + * convert it to NO. It would be more consistent, perhaps, to + * return the result of TS_phrase_execute() verbatim and then + * convert MAYBE results at the top of the recursion. But + * converting at the topmost phrase operator gives results that + * are bug-compatible with the old implementation, so do it like + * this for now. + */ + switch (rum_phrase_execute(curitem, arg, flags, chkcond, NULL)) + { + case TS_NO: + return TS_NO; + case TS_YES: + return TS_YES; + case TS_MAYBE: + return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO; + } + break; + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); + } + + /* not reachable, but keep compiler quiet */ + return TS_NO; } Datum @@ -274,23 +846,23 @@ rum_tsquery_consistent(PG_FUNCTION_ARGS) bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); - bool res = false; + + TSTernaryValue res = TS_NO; /* - * The query requires recheck only if it involves weights + * The query doesn't require recheck by default */ *recheck = false; if (query->size > 0) { - QueryItem *item; RumChkVal gcv; /* * check-parameter array has one entry for each value (operand) in the * query. */ - gcv.first_item = item = GETQUERY(query); + gcv.first_item = GETQUERY(query); gcv.check = check; gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = recheck; @@ -298,14 +870,15 @@ rum_tsquery_consistent(PG_FUNCTION_ARGS) gcv.addInfoIsNull = addInfoIsNull; gcv.recheckPhrase = false; - res = TS_execute(GETQUERY(query), &gcv, - TS_EXEC_CALC_NOT, - checkcondition_rum); + res = rum_TS_execute(GETQUERY(query), &gcv, + TS_EXEC_CALC_NOT, + checkcondition_rum); + if (res == TS_MAYBE) + *recheck = true; } - PG_RETURN_BOOL(res); -} +} Datum rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) { @@ -319,7 +892,7 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); - bool res = false; + TSTernaryValue res = TS_NO; /* * The query requires recheck only if it involves weights @@ -328,14 +901,13 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) if (query->size > 0) { - QueryItem *item; RumChkVal gcv; /* * check-parameter array has one entry for each value (operand) in the * query. */ - gcv.first_item = item = GETQUERY(query); + gcv.first_item = GETQUERY(query); gcv.check = check; gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = recheck; @@ -343,11 +915,12 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) gcv.addInfoIsNull = addInfoIsNull; gcv.recheckPhrase = true; - res = TS_execute(GETQUERY(query), &gcv, - TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, - checkcondition_rum); + res = rum_TS_execute(GETQUERY(query), &gcv, + TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, + checkcondition_rum); + if (res == TS_MAYBE) + *recheck = true; } - PG_RETURN_BOOL(res); } From ef0c701d08cb8b8cdbf6687d3183d2dd2f574cb9 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 23 Jun 2020 22:07:56 +0400 Subject: [PATCH 077/182] Change TS->rum in names of variables and grammar corrections --- src/rum_ts_utils.c | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 2f1ea074aa..801bd54a4e 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -34,15 +34,14 @@ #define TS_EXEC_PHRASE_NO_POS TS_EXEC_PHRASE_AS_AND #endif -#ifndef TSTernaryValue typedef enum { TS_NO, /* definitely no match */ TS_YES, /* definitely does match */ TS_MAYBE /* can't verify match for lack of pos data */ -} TSTernaryValue; -typedef TSTernaryValue (*TSExecuteCallbackTernary) (void *arg, QueryOperand *val, ExecPhraseData *data); -#endif +} RumTernaryValue; +typedef RumTernaryValue (*RumExecuteCallbackTernary) (void *arg, QueryOperand *val, ExecPhraseData *data); + PG_FUNCTION_INFO_V1(rum_extract_tsvector); PG_FUNCTION_INFO_V1(rum_extract_tsvector_hash); @@ -70,19 +69,19 @@ static Datum build_tsvector_hash_entry(TSVector vector, WordEntry *we); static Datum build_tsquery_entry(TSQuery query, QueryOperand *operand); static Datum build_tsquery_hash_entry(TSQuery query, QueryOperand *operand); -static TSTernaryValue +static RumTernaryValue rum_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, int emit, int Loffset, int Roffset, int max_npos); -static TSTernaryValue +static RumTernaryValue rum_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallbackTernary chkcond, + RumExecuteCallbackTernary chkcond, ExecPhraseData *data); -static TSTernaryValue +static RumTernaryValue rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallbackTernary chkcond); + RumExecuteCallbackTernary chkcond); typedef Datum (*TSVectorEntryBuilder)(TSVector vector, WordEntry *we); typedef Datum (*TSQueryEntryBuilder)(TSQuery query, QueryOperand *operand); @@ -229,7 +228,7 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) } -static TSTernaryValue +static RumTernaryValue checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) { RumChkVal *gcv = (RumChkVal *) checkval; @@ -302,13 +301,13 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) * "!word:A" can mean both: "word:BCÐ’" or "!word" */ else if (val->weight == 0) - /* Query without weigths */ + /* Query without weights */ return TS_YES; else { char KeyWeightsMask = 0; - /* Fill KeyWeightMask contains with weigths from all positions */ + /* Fill KeyWeightMask contains with weights from all positions */ for (i = 0; i < npos; i++) { ptrt = decompress_pos(ptrt, &post); @@ -339,7 +338,7 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) * negative positions, which won't fit into WordEntryPos. * * The result is boolean (TS_YES or TS_NO), but for the caller's convenience - * we return it as TSTernaryValue. + * we return it as RumTernaryValue. * * Returns TS_YES if any positions were emitted to *data; or if data is NULL, * returns TS_YES if any positions would have been emitted. @@ -348,7 +347,7 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) #define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */ #define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */ -static TSTernaryValue +static RumTernaryValue rum_phrase_output(ExecPhraseData *data, ExecPhraseData *Ldata, ExecPhraseData *Rdata, @@ -481,7 +480,7 @@ rum_phrase_output(ExecPhraseData *data, * the starts. (This unintuitive rule is needed to avoid possibly generating * negative positions, which wouldn't fit into the WordEntryPos arrays.) * - * If the TSExecuteCallback function reports that an operand is present + * If the RumExecuteCallback function reports that an operand is present * but fails to provide position(s) for it, we will return TS_MAYBE when * it is possible but not certain that the query is matched. * @@ -489,14 +488,14 @@ rum_phrase_output(ExecPhraseData *data, * negate = false (which is the state initialized by the caller); but the * "width" output in such cases is undefined. */ -static TSTernaryValue +static RumTernaryValue rum_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallbackTernary chkcond, + RumExecuteCallbackTernary chkcond, ExecPhraseData *data) { ExecPhraseData Ldata, Rdata; - TSTernaryValue lmatch, + RumTernaryValue lmatch, rmatch; int Loffset, Roffset, @@ -737,12 +736,11 @@ rum_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, * chkcond: callback function to check whether a primitive value is present */ -static TSTernaryValue +static RumTernaryValue rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, - TSExecuteCallbackTernary chkcond) + RumExecuteCallbackTernary chkcond) { - TSTernaryValue lmatch; - + RumTernaryValue lmatch; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); @@ -847,7 +845,7 @@ rum_tsquery_consistent(PG_FUNCTION_ARGS) Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); - TSTernaryValue res = TS_NO; + RumTernaryValue res = TS_NO; /* * The query doesn't require recheck by default @@ -892,7 +890,7 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); - TSTernaryValue res = TS_NO; + RumTernaryValue res = TS_NO; /* * The query requires recheck only if it involves weights From 9d16555f8805cf2ae2d57084916c058c058c533f Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Tue, 28 Jul 2020 14:23:59 +0300 Subject: [PATCH 078/182] cosmetic code fixes --- src/rum_ts_utils.c | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 801bd54a4e..35a9b377bd 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -171,7 +171,11 @@ static WordEntryPosVector POSNULL = { #define RANK_NORM_RDIVRPLUS1 0x20 #define DEF_NORM_METHOD RANK_NO_NORM -#define TS_EXEC_IN_NEG 0x04 +/* + * Should not conflict with defines + * TS_EXEC_EMPTY/TS_EXEC_CALC_NOT/TS_EXEC_PHRASE_NO_POS + */ +#define TS_EXEC_IN_NEG 0x04 #define QR_GET_OPERAND(q, v) \ (&((q)->operandData[ ((QueryItem*)(v)) - GETQUERY((q)->query) ])) @@ -197,10 +201,8 @@ Datum rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); - TSQuery query = PG_GETARG_TSQUERY(2); - - Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); bool recheck; bool res = false; @@ -247,7 +249,6 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) int32 i; char *ptrt; WordEntryPos post; - post = 0; int32 npos; int32 k = 0; @@ -267,9 +268,7 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) /* caller wants an array of positions (phrase search) */ if (data) { - const int32 itemsize = sizeof(*data->pos); - - data->pos = palloc(itemsize * npos); + data->pos = palloc(sizeof(*data->pos) * npos); data->allocated = true; /* Fill positions that has right weight to return to a caller */ @@ -289,7 +288,7 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) } } data->npos = k; - data->pos = repalloc(data->pos, itemsize * k); + data->pos = repalloc(data->pos, sizeof(*data->pos) * k); return (k ? TS_YES : TS_NO); } @@ -835,12 +834,10 @@ Datum rum_tsquery_consistent(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); - /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ TSQuery query = PG_GETARG_TSQUERY(2); - /* int32 nkeys = PG_GETARG_INT32(3); */ - Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); @@ -881,12 +878,10 @@ Datum rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) { bool *check = (bool *) PG_GETARG_POINTER(0); - /* StrategyNumber strategy = PG_GETARG_UINT16(1); */ TSQuery query = PG_GETARG_TSQUERY(2); - /* int32 nkeys = PG_GETARG_INT32(3); */ - Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); bool *recheck = (bool *) PG_GETARG_POINTER(5); Datum *addInfo = (Datum *) PG_GETARG_POINTER(8); bool *addInfoIsNull = (bool *) PG_GETARG_POINTER(9); From 5103be5265dec6ef4ff0bf0447e9a2ef859e08cb Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 31 Jul 2020 15:08:50 +0400 Subject: [PATCH 079/182] Bugfix and small refactor of rum_ts_join_pos --- src/rum_ts_utils.c | 50 ++++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 35a9b377bd..05455c0c07 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -991,6 +991,7 @@ count_pos(char *ptr, int len) if (!(ptr[i] & HIGHBIT)) count++; } + Assert(!(ptr[i-1] & HIGHBIT)); return count; } @@ -2208,6 +2209,7 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) count2 = count_pos(in2, VARSIZE_ANY_EXHDR(addInfo2)), countRes = 0; int i1 = 0, i2 = 0; + int n_equals = 0; Size size; WordEntryPos pos1 = 0, pos2 = 0, @@ -2220,51 +2222,61 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) in1 = decompress_pos(in1, &pos1); in2 = decompress_pos(in2, &pos2); - while(i1 < count1 && i2 < count2) + for(;;) { if (WEP_GETPOS(pos1) > WEP_GETPOS(pos2)) { pos[countRes++] = pos2; - if (i2 < count2) - in2 = decompress_pos(in2, &pos2); i2++; + if (i2 >= count2) + break; + in2 = decompress_pos(in2, &pos2); } else if (WEP_GETPOS(pos1) < WEP_GETPOS(pos2)) { pos[countRes++] = pos1; - if (i1 < count1) - in1 = decompress_pos(in1, &pos1); i1++; + if (i1 >= count1) + break; + in1 = decompress_pos(in1, &pos1); } else { pos[countRes++] = pos1; + n_equals++; + i1++; + i2++; if (i1 < count1) in1 = decompress_pos(in1, &pos1); if (i2 < count2) in2 = decompress_pos(in2, &pos2); - i1++; - i2++; + if (i2 >= count2 || i1 >= count1) + break; } } - while(i1 < count1) - { - pos[countRes++] = pos1; - if (i1 < count1) + if (i1 < count1) + for(;;) + { + pos[countRes++] = pos1; + i1++; + if (i1 >= count1) + break; in1 = decompress_pos(in1, &pos1); - i1++; - } - - while(i2 < count2) + } + else if (i2 < count2) { - pos[countRes++] = pos2; - if (i2 < count2) + for(;;) + { + pos[countRes++] = pos2; + i2++; + if (i2 >= count2) + break; in2 = decompress_pos(in2, &pos2); - i2++; + } } - Assert(countRes <= (count1 + count2)); + Assert(countRes == count1 + count2 - n_equals); /* * In some cases compressed positions may take more memory than From 4f70b4870d0909a8e5ead226728094302dfbbc40 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 31 Jul 2020 19:00:55 +0400 Subject: [PATCH 080/182] cosmetic fix --- src/rumsort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rumsort.c b/src/rumsort.c index 8aa10fef4f..e581a62cf6 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- * - * rumsort.h + * rumsort.c * Generalized tuple sorting routines. * * This module handles sorting of RumSortItem or RumScanItem structures. From a687976b16e154dba25e3a7177900a1b334ae79f Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Fri, 31 Jul 2020 18:47:54 +0300 Subject: [PATCH 081/182] Remove assert-enabled-only variable, cosmetic fix for Assert --- src/rum_ts_utils.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 05455c0c07..dbb8aaf816 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -972,6 +972,7 @@ decompress_pos(char *ptr, WordEntryPos *pos) else { delta |= (v & LOWERMASK) << i; + Assert(delta <= 0x3fff); *pos += delta; WEP_SETWEIGHT(*pos, v >> 5); return ptr; @@ -991,7 +992,7 @@ count_pos(char *ptr, int len) if (!(ptr[i] & HIGHBIT)) count++; } - Assert(!(ptr[i-1] & HIGHBIT)); + Assert((ptr[i-1] & HIGHBIT) == 0); return count; } @@ -2209,7 +2210,6 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) count2 = count_pos(in2, VARSIZE_ANY_EXHDR(addInfo2)), countRes = 0; int i1 = 0, i2 = 0; - int n_equals = 0; Size size; WordEntryPos pos1 = 0, pos2 = 0, @@ -2243,7 +2243,6 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) else { pos[countRes++] = pos1; - n_equals++; i1++; i2++; if (i1 < count1) @@ -2276,7 +2275,7 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) } } - Assert(countRes == count1 + count2 - n_equals); + Assert(countRes <= count1 + count2); /* * In some cases compressed positions may take more memory than From aa110276b2db802d176add01be612f2e7b939986 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 3 Aug 2020 11:44:06 +0400 Subject: [PATCH 082/182] remove compiler warning --- src/rum_ts_utils.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index dbb8aaf816..92aebdb6db 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -248,8 +248,7 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) bytea *positions; int32 i; char *ptrt; - WordEntryPos post; - post = 0; + WordEntryPos post = 0; int32 npos; int32 k = 0; From e36d1644fd52dcb11ab3ef9bf8cde874cbbe2560 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 17 Aug 2020 14:40:40 +0400 Subject: [PATCH 083/182] Fix svace warnings + cosmetic --- src/btree_rum.c | 1 + src/rum_ts_utils.c | 6 +++--- src/rumdatapage.c | 17 +++++++++-------- src/rumentrypage.c | 1 + src/rumget.c | 4 ++-- src/rumscan.c | 6 +++--- 6 files changed, 19 insertions(+), 16 deletions(-) diff --git a/src/btree_rum.c b/src/btree_rum.c index 170ace6aba..dd43a3c037 100644 --- a/src/btree_rum.c +++ b/src/btree_rum.c @@ -112,6 +112,7 @@ rum_btree_extract_query(FunctionCallInfo fcinfo, case BTGreaterEqualStrategyNumber: case BTGreaterStrategyNumber: *ptr_partialmatch = true; + /*FALLTHROUGH*/ case BTEqualStrategyNumber: case RUM_DISTANCE: case RUM_LEFT_DISTANCE: diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 4cfda6b80a..81a3a7d37a 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -53,7 +53,7 @@ PG_FUNCTION_INFO_V1(rum_ts_join_pos); PG_FUNCTION_INFO_V1(tsquery_to_distance_query); -static int count_pos(char *ptr, int len); +static unsigned int count_pos(char *ptr, int len); static char *decompress_pos(char *ptr, WordEntryPos *pos); static Datum build_tsvector_entry(TSVector vector, WordEntry *we); static Datum build_tsvector_hash_entry(TSVector vector, WordEntry *we); @@ -354,7 +354,7 @@ rum_tsquery_timestamp_consistent(PG_FUNCTION_ARGS) #define SIXTHBIT 0x20 #define LOWERMASK 0x1F -static int +static unsigned int compress_pos(char *target, WordEntryPos *pos, int npos) { int i; @@ -414,7 +414,7 @@ decompress_pos(char *ptr, WordEntryPos *pos) } } -static int +static unsigned int count_pos(char *ptr, int len) { int count = 0, diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 66049e8bf7..aaffe93596 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -20,7 +20,7 @@ static BlockNumber dataGetRightMostPage(RumBtree btree, Page page); /* Does datatype allow packing into the 1-byte-header varlena format? */ #define TYPE_IS_PACKABLE(typlen, typstorage) \ - ((typlen) == -1 && (typstorage) != 'p') + ((typlen) == -1 && (typstorage) != TYPSTORAGE_PLAIN) /* * Increment data_length by the space needed by the datum, including any @@ -99,7 +99,7 @@ rumDatumWrite(Pointer ptr, Datum datum, bool typbyval, char typalign, elog(ERROR, "unsupported byval length: %d", (int) (typlen)); } - data_length = typlen; + data_length = (Size)typlen; } else if (typlen == -1) { @@ -149,7 +149,7 @@ rumDatumWrite(Pointer ptr, Datum datum, bool typbyval, char typalign, /* fixed-length pass-by-reference */ ptr = (char *) att_align_nominal(ptr, typalign); Assert(typlen > 0); - data_length = typlen; + data_length = (Size)typlen; memmove(ptr, DatumGetPointer(datum), data_length); } @@ -736,7 +736,7 @@ RumDataPageAddItem(Page page, void *data, OffsetNumber offset) if (offset <= maxoff) memmove(ptr + sizeof(PostingItem), ptr, - (maxoff - offset + 1) * sizeof(PostingItem)); + ((uint16_t)(maxoff - offset + 1)) * sizeof(PostingItem)); } memcpy(ptr, data, sizeof(PostingItem)); RumPageGetOpaque(page)->maxoff++; @@ -763,7 +763,7 @@ RumPageDeletePostingItem(Page page, OffsetNumber offset) char *dstptr = RumDataPageGetItem(page, offset), *sourceptr = RumDataPageGetItem(page, offset + 1); - memmove(dstptr, sourceptr, sizeof(PostingItem) * (maxoff - offset)); + memmove(dstptr, sourceptr, sizeof(PostingItem) * (uint16_t)(maxoff - offset)); } RumPageGetOpaque(page)->maxoff--; @@ -1229,7 +1229,7 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, RumItem *bound; Page newlPage = PageGetTempPageCopy(BufferGetPage(lbuf)); RumItem oldbound = *RumDataPageGetRightBound(newlPage); - int sizeofitem = sizeof(PostingItem); + unsigned int sizeofitem = sizeof(PostingItem); OffsetNumber maxoff = RumPageGetOpaque(newlPage)->maxoff; Size pageSize = PageGetPageSize(newlPage); Size freeSpace; @@ -1246,7 +1246,7 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, Assert(!RumPageIsLeaf(newlPage)); ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) - memmove(ptr + sizeofitem, ptr, (maxoff - off + 1) * sizeofitem); + memmove(ptr + sizeofitem, ptr, (uint16_t)(maxoff - off + 1) * sizeofitem); memcpy(ptr, &(btree->pitem), sizeofitem); maxoff++; @@ -1273,7 +1273,7 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, ptr = RumDataPageGetItem(rPage, FirstOffsetNumber); memcpy(ptr, vector + separator * sizeofitem, - (maxoff - separator) * sizeofitem); + (uint16_t)(maxoff - separator) * sizeofitem); RumPageGetOpaque(rPage)->maxoff = maxoff - separator; /* Adjust pd_lower */ ((PageHeader) rPage)->pd_lower = (ptr + @@ -1501,6 +1501,7 @@ rumInsertItemPointers(RumState * rumstate, RumItem * items, uint32 nitem, GinStatsData *buildStats) { + Assert(gdi->stack); BlockNumber rootBlkno = gdi->stack->blkno; gdi->btree.items = items; diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 10334ec862..cecc3656d4 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -112,6 +112,7 @@ static IndexTuple getRightMostTuple(Page page) { OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + Assert(maxoff!=InvalidOffsetNumber); return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff)); } diff --git a/src/rumget.c b/src/rumget.c index 32cb13eb94..0672b1c0e6 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -687,7 +687,7 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) else if (RumGetNPosting(itup) > 0) { entry->nlist = RumGetNPosting(itup); - entry->predictNumberResult = entry->nlist; + entry->predictNumberResult = (uint32_t)entry->nlist; entry->list = (RumItem *) palloc(sizeof(RumItem) * entry->nlist); rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); @@ -1104,7 +1104,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) else if (RumGetNPosting(itup) > 0) { entry->nlist = RumGetNPosting(itup); - entry->predictNumberResult = entry->nlist; + entry->predictNumberResult = (uint32_t)entry->nlist; entry->list = (RumItem *) palloc(sizeof(RumItem) * entry->nlist); rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); diff --git a/src/rumscan.c b/src/rumscan.c index 34de9d4fc4..0a29fbf7d7 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -469,12 +469,12 @@ lookupScanDirection(RumState *state, AttrNumber attno, StrategyNumber strategy) int i; RumConfig *rumConfig = state->rumConfig + attno - 1; - for(i = 0; rumConfig->strategyInfo[i].strategy != InvalidStrategy && - i < MAX_STRATEGIES; i++) + for(i = 0; i < MAX_STRATEGIES; i++) { + if (rumConfig->strategyInfo[i].strategy != InvalidStrategy) + break; if (rumConfig->strategyInfo[i].strategy == strategy) return rumConfig->strategyInfo[i].direction; - } return NoMovementScanDirection; From 115e20e4476d546b069baf2dd7e49a5d9ac696d3 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 19 Aug 2020 21:54:41 +0400 Subject: [PATCH 084/182] fix definition --- src/rumdatapage.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rumdatapage.c b/src/rumdatapage.c index aaffe93596..2d386167f7 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -20,7 +20,7 @@ static BlockNumber dataGetRightMostPage(RumBtree btree, Page page); /* Does datatype allow packing into the 1-byte-header varlena format? */ #define TYPE_IS_PACKABLE(typlen, typstorage) \ - ((typlen) == -1 && (typstorage) != TYPSTORAGE_PLAIN) + ((typlen) == -1 && (typstorage) != 'p') /* * Increment data_length by the space needed by the datum, including any From 631a626db1ed390f99421ddf408523660a724d38 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 20 Aug 2020 17:19:13 +0400 Subject: [PATCH 085/182] cosmetic fix --- src/rumdatapage.c | 5 +++-- src/rumentrypage.c | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 2d386167f7..484c6137e4 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -1501,9 +1501,10 @@ rumInsertItemPointers(RumState * rumstate, RumItem * items, uint32 nitem, GinStatsData *buildStats) { - Assert(gdi->stack); - BlockNumber rootBlkno = gdi->stack->blkno; + BlockNumber rootBlkno; + Assert(gdi->stack); + rootBlkno = gdi->stack->blkno; gdi->btree.items = items; gdi->btree.nitem = nitem; gdi->btree.curitem = 0; diff --git a/src/rumentrypage.c b/src/rumentrypage.c index cecc3656d4..80257c41bd 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -112,7 +112,8 @@ static IndexTuple getRightMostTuple(Page page) { OffsetNumber maxoff = PageGetMaxOffsetNumber(page); - Assert(maxoff!=InvalidOffsetNumber); + + Assert(maxoff != InvalidOffsetNumber); return (IndexTuple) PageGetItem(page, PageGetItemId(page, maxoff)); } From e2e2a9fc9ed5b43e774ce4c5c1524dfa3b82544f Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Mon, 28 Sep 2020 18:32:40 +0300 Subject: [PATCH 086/182] Remove unused variable --- src/rum_ts_utils.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 1e43c2bf9e..03a19f7497 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -208,14 +208,13 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) if (query->size > 0) { - QueryItem *item; RumChkVal gcv; /* * check-parameter array has one entry for each value (operand) in the * query. */ - gcv.first_item = item = GETQUERY(query); + gcv.first_item = GETQUERY(query); gcv.check = check; gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; From 4e2a49f993ea980dc735662c7f30a757cbbb2115 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 29 Sep 2020 13:04:13 +0400 Subject: [PATCH 087/182] Make compatible with PG13 changes in TS_execute and callback interface --- src/rum.h | 2 +- src/rum_ts_utils.c | 47 +++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/rum.h b/src/rum.h index 36f2e21f4b..b578907bcd 100644 --- a/src/rum.h +++ b/src/rum.h @@ -103,7 +103,7 @@ typedef struct RumMetaPageData int64 nEntries; } RumMetaPageData; -#define RUM_CURRENT_VERSION (0xC0DE0002) +#define RUM_CURRENT_VERSION (0xC0DE0003) #define RumPageGetMeta(p) \ ((RumMetaPageData *) PageGetContents(p)) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 03a19f7497..946633b920 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -34,15 +34,33 @@ #define TS_EXEC_PHRASE_NO_POS TS_EXEC_PHRASE_AS_AND #endif +#if PG_VERSION_NUM >= 130000 +/* Since v13 TS_execute flag naming and defaults have reverted: + * - before v13 - - since v13 - + * TS_EXEC_CALC_NOT (0x01) TS_EXEC_SKIP_NOT (0x01) + */ +#define TS_EXEC_CALC_NOT (0x01) /* Defined here for use with rum_TS_execute for + * compatibility with version < 13 where this + * flag was defined globally. + * XXX Since v13 current global flag + * TS_EXEC_SKIP_NOT has reverted meaning for + * TS_execute but TS_EXEC_CALC_NOT should still + * be passed to rum_TS_execute in unchanged (previous) + * meaning but should not be passed into TS_execute: + * (TS_execute will do 'calc not' by default, and + * if you need skip it, use new TS_EXEC_SKIP_NOT) + */ +typedef TSTernaryValue RumTernaryValue; +#else typedef enum { TS_NO, /* definitely no match */ TS_YES, /* definitely does match */ TS_MAYBE /* can't verify match for lack of pos data */ } RumTernaryValue; +#endif typedef RumTernaryValue (*RumExecuteCallbackTernary) (void *arg, QueryOperand *val, ExecPhraseData *data); - PG_FUNCTION_INFO_V1(rum_extract_tsvector); PG_FUNCTION_INFO_V1(rum_extract_tsvector_hash); PG_FUNCTION_INFO_V1(rum_extract_tsquery); @@ -180,7 +198,11 @@ static WordEntryPosVector POSNULL = { #define QR_GET_OPERAND(q, v) \ (&((q)->operandData[ ((QueryItem*)(v)) - GETQUERY((q)->query) ])) +#if PG_VERSION_NUM >= 130000 +static TSTernaryValue +#else static bool +#endif pre_checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) { RumChkVal *gcv = (RumChkVal *) checkval; @@ -192,9 +214,12 @@ pre_checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) /* convert item's number to corresponding entry's (operand's) number */ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; - /* return presence of current entry in indexed value */ + #if PG_VERSION_NUM >= 130000 + return ( *(gcv->need_recheck) ? TS_MAYBE : gcv->check[j] ); + #else return gcv->check[j]; + #endif } Datum @@ -219,10 +244,17 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; +#if PG_VERSION_NUM >= 130000 + res = TS_execute(GETQUERY(query), + &gcv, + TS_EXEC_PHRASE_NO_POS | TS_EXEC_SKIP_NOT, + pre_checkcondition_rum); +#else res = TS_execute(GETQUERY(query), &gcv, TS_EXEC_PHRASE_NO_POS, pre_checkcondition_rum); +#endif } PG_RETURN_BOOL(res); @@ -1466,9 +1498,13 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, } } - +#if PG_VERSION_NUM >= 130000 + if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_SKIP_NOT, + (TSExecuteCallback) checkcondition_QueryOperand)) +#else if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY, checkcondition_QueryOperand)) +#endif { if (ptr->pos > ext->q) { @@ -1508,8 +1544,13 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, WEP_SETWEIGHT(qro->pos, ptr->wclass); } } +#if PG_VERSION_NUM >= 130000 + if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY, + (TSExecuteCallback) checkcondition_QueryOperand)) +#else if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_CALC_NOT, checkcondition_QueryOperand)) +#endif { if (ptr->pos < ext->p) { From 0e5f0e0d9aaf9276c731bd1e27e98c94159334d4 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Tue, 29 Sep 2020 12:52:27 +0300 Subject: [PATCH 088/182] [PGPRO-4224] Make compile on v13 (LogicalTapeSetCreate changes). Silence waring in switch(). Tags: rum --- src/rumsort.c | 4 ++++ src/rumtsquery.c | 1 + 2 files changed, 5 insertions(+) diff --git a/src/rumsort.c b/src/rumsort.c index e581a62cf6..2bc35d0847 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -40,7 +40,11 @@ #endif #if PG_VERSION_NUM >= 110000 +#if PG_VERSION_NUM >= 130000 +#define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, false, NULL, NULL, 1) +#else #define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, NULL, NULL, 1) +#endif #define LogicalTapeFreeze(X, Y) LogicalTapeFreeze(X, Y, NULL) #endif diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 747f98f2cf..0f10500c55 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -108,6 +108,7 @@ make_query_item_wrap(QueryItem *item, QueryItemWrap *parent, bool not) } case OP_PHRASE: elog(ERROR, "Indexing of phrase tsqueries isn't supported yet"); + break; default: elog(ERROR, "Invalid tsquery operator"); } From d731ce95f309614ff85dad3c7ec713238f7c22da Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 29 Sep 2020 19:28:51 +0400 Subject: [PATCH 089/182] Avoid sanitizer warnings --- src/rum_ts_utils.c | 4 ++-- src/rumscan.c | 9 ++++++--- src/rumsort.c | 4 ++++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 946633b920..d94250def8 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -216,7 +216,7 @@ pre_checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item]; /* return presence of current entry in indexed value */ #if PG_VERSION_NUM >= 130000 - return ( *(gcv->need_recheck) ? TS_MAYBE : gcv->check[j] ); + return ( *(gcv->need_recheck) ? TS_MAYBE : (gcv->check[j] ? TS_YES : TS_NO) ); #else return gcv->check[j]; #endif @@ -228,7 +228,7 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) bool *check = (bool *) PG_GETARG_POINTER(0); TSQuery query = PG_GETARG_TSQUERY(2); Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); - bool recheck; + bool recheck = false; bool res = false; if (query->size > 0) diff --git a/src/rumscan.c b/src/rumscan.c index 0a29fbf7d7..8048215971 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -703,9 +703,12 @@ rumNewScanKey(IndexScanDesc scan) repalloc(so->entries, so->allocentries * sizeof(RumScanEntry)); } - memcpy(so->entries + so->totalentries, - key->scanEntry, sizeof(*key->scanEntry) * key->nentries); - so->totalentries += key->nentries; + if ( key->scanEntry != NULL ) + { + memcpy(so->entries + so->totalentries, + key->scanEntry, sizeof(*key->scanEntry) * key->nentries); + so->totalentries += key->nentries; + } } /* diff --git a/src/rumsort.c b/src/rumsort.c index 2bc35d0847..e0ccd40854 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -54,7 +54,11 @@ /* GUC variables */ #ifdef TRACE_SORT +#ifndef trace_sort bool trace_sort = false; +#else +trace_sort = false; +#endif #endif typedef struct From c990d3d0c0527cef9b65ba6ae5a0c18e047cc3cf Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 29 Sep 2020 21:04:50 +0400 Subject: [PATCH 090/182] Restore GUC (re) definition --- src/rumsort.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index e0ccd40854..7afd17cecb 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -54,11 +54,7 @@ /* GUC variables */ #ifdef TRACE_SORT -#ifndef trace_sort -bool trace_sort = false; -#else -trace_sort = false; -#endif +bool trace_sort = false; #endif typedef struct From 5abca543f22b377a6597175fa488e04df3a64c2f Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 29 Sep 2020 22:10:23 +0400 Subject: [PATCH 091/182] make guc variable external --- src/rumsort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rumsort.c b/src/rumsort.c index 7afd17cecb..c92b360856 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -54,7 +54,7 @@ /* GUC variables */ #ifdef TRACE_SORT -bool trace_sort = false; +extern bool trace_sort; #endif typedef struct From aebcf8a6fc1178d874c7a4c905e13fe488689fe1 Mon Sep 17 00:00:00 2001 From: Victor Wagner Date: Wed, 30 Sep 2020 23:34:34 +0300 Subject: [PATCH 092/182] [PGPRO-4224] Include guc.h instead of declaring guc variable Windows build does some magic to export variables from postgres.exe and import them into the extension. So it is better to include backend include file than to redeclare variable in the extension code --- src/rumsort.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index c92b360856..c6f4bef5ac 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -52,9 +52,13 @@ * Below are copied definitions from src/backend/utils/sort/tuplesort.c. */ -/* GUC variables */ +/* GUC variables shouldn't be declared explicitely. + Rather corresponigng include file should be include because it + contains neccessary Windows export/import magic. And part of this + magic should be done during postgres.exe compilation +*/ #ifdef TRACE_SORT -extern bool trace_sort; +#include #endif typedef struct From a79b46b4faa87793118cb913eeadd76fc56aa279 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 1 Oct 2020 16:08:13 +0400 Subject: [PATCH 093/182] Allow trace_sort to be declared local to provide compatibility with Windows builds of non-PGPRO versions and on versions < 13 --- src/rumsort.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index c6f4bef5ac..10defa4073 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -21,7 +21,6 @@ #include "commands/tablespace.h" #include "executor/executor.h" -#include "utils/guc.h" #include "utils/logtape.h" #include "utils/pg_rusage.h" @@ -52,13 +51,18 @@ * Below are copied definitions from src/backend/utils/sort/tuplesort.c. */ -/* GUC variables shouldn't be declared explicitely. - Rather corresponigng include file should be include because it - contains neccessary Windows export/import magic. And part of this - magic should be done during postgres.exe compilation -*/ +/* For PGPRO since v.13 trace_sort is imported from backend by including its + * declaration in guc.h (guc.h contains added Windows export/import magic to be done + * during postgres.exe compilation). + * For older or non-PGPRO versions on Windows platform trace_sort is not exported by + * backend so it is declared local for this case. + */ #ifdef TRACE_SORT -#include +#if PG_VERSION_NUM >= 130000 && defined (PGPRO_VERSION) +#include "utils/guc.h" +#else +bool trace_sort = false; +#endif #endif typedef struct From d9607bfc80128a0e16e54c854eb5bf83102e8d7e Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 1 Oct 2020 16:32:51 +0400 Subject: [PATCH 094/182] One more ifdef for windows compiler --- src/rumsort.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rumsort.c b/src/rumsort.c index 10defa4073..37f9e5760c 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -58,7 +58,7 @@ * backend so it is declared local for this case. */ #ifdef TRACE_SORT -#if PG_VERSION_NUM >= 130000 && defined (PGPRO_VERSION) +#if ( !defined (_MSC_VER) || (PG_VERSION_NUM >= 130000 && defined (PGPRO_VERSION)) ) #include "utils/guc.h" #else bool trace_sort = false; From 063ca03cf40fafb0ef754dce2c4282ee0f30fb82 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Tue, 13 Oct 2020 11:09:13 +0300 Subject: [PATCH 095/182] Revert version bump in 4e2a49f993ea980dc735662c7f30a757cbbb2115 commit: that is only on-disk format version, not an API --- src/rum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rum.h b/src/rum.h index b578907bcd..36f2e21f4b 100644 --- a/src/rum.h +++ b/src/rum.h @@ -103,7 +103,7 @@ typedef struct RumMetaPageData int64 nEntries; } RumMetaPageData; -#define RUM_CURRENT_VERSION (0xC0DE0003) +#define RUM_CURRENT_VERSION (0xC0DE0002) #define RumPageGetMeta(p) \ ((RumMetaPageData *) PageGetContents(p)) From dc8a92f7c3a7b0f77830c1dc500f7dccd36eadde Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 14 Oct 2020 16:56:20 +0300 Subject: [PATCH 096/182] Fix bug in v13 support. For now it's only observed with clang v11+ with -O0 level --- src/rum_ts_utils.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index d94250def8..617fa202ac 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -1432,7 +1432,16 @@ compareDocR(const void *va, const void *vb) return (a->pos > b->pos) ? 1 : -1; } -static bool +/* + * Be carefull: clang 11+ is very sensitive to casting function + * with different return value. + */ +static +#if PG_VERSION_NUM >= 130000 +TSTernaryValue +#else +bool +#endif checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data) { @@ -1453,7 +1462,11 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val, data->allocated = false; } - return qro->operandexist; + return qro->operandexist +#if PG_VERSION_NUM >= 130000 + ? TS_YES : TS_NO +#endif + ; } static bool @@ -1498,13 +1511,13 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, } } + if (TS_execute(GETQUERY(qr->query), (void *) qr, #if PG_VERSION_NUM >= 130000 - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_SKIP_NOT, - (TSExecuteCallback) checkcondition_QueryOperand)) + TS_EXEC_SKIP_NOT, #else - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY, - checkcondition_QueryOperand)) + TS_EXEC_EMPTY, #endif + checkcondition_QueryOperand)) { if (ptr->pos > ext->q) { @@ -1544,13 +1557,13 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, WEP_SETWEIGHT(qro->pos, ptr->wclass); } } + if (TS_execute(GETQUERY(qr->query), (void *) qr, #if PG_VERSION_NUM >= 130000 - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY, - (TSExecuteCallback) checkcondition_QueryOperand)) + TS_EXEC_EMPTY, #else - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_CALC_NOT, - checkcondition_QueryOperand)) + TS_EXEC_CALC_NOT, #endif + checkcondition_QueryOperand)) { if (ptr->pos < ext->p) { From 19a638170f878271363a22214d4c7c63211e2943 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Wed, 14 Oct 2020 16:56:20 +0300 Subject: [PATCH 097/182] Fix bug in v13 support. For now it's only observed with clang v11+ with -O2 level --- src/rum_ts_utils.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index d94250def8..617fa202ac 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -1432,7 +1432,16 @@ compareDocR(const void *va, const void *vb) return (a->pos > b->pos) ? 1 : -1; } -static bool +/* + * Be carefull: clang 11+ is very sensitive to casting function + * with different return value. + */ +static +#if PG_VERSION_NUM >= 130000 +TSTernaryValue +#else +bool +#endif checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data) { @@ -1453,7 +1462,11 @@ checkcondition_QueryOperand(void *checkval, QueryOperand *val, data->allocated = false; } - return qro->operandexist; + return qro->operandexist +#if PG_VERSION_NUM >= 130000 + ? TS_YES : TS_NO +#endif + ; } static bool @@ -1498,13 +1511,13 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, } } + if (TS_execute(GETQUERY(qr->query), (void *) qr, #if PG_VERSION_NUM >= 130000 - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_SKIP_NOT, - (TSExecuteCallback) checkcondition_QueryOperand)) + TS_EXEC_SKIP_NOT, #else - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY, - checkcondition_QueryOperand)) + TS_EXEC_EMPTY, #endif + checkcondition_QueryOperand)) { if (ptr->pos > ext->q) { @@ -1544,13 +1557,13 @@ Cover(DocRepresentation *doc, uint32 len, QueryRepresentation *qr, WEP_SETWEIGHT(qro->pos, ptr->wclass); } } + if (TS_execute(GETQUERY(qr->query), (void *) qr, #if PG_VERSION_NUM >= 130000 - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_EMPTY, - (TSExecuteCallback) checkcondition_QueryOperand)) + TS_EXEC_EMPTY, #else - if (TS_execute(GETQUERY(qr->query), (void *) qr, TS_EXEC_CALC_NOT, - checkcondition_QueryOperand)) + TS_EXEC_CALC_NOT, #endif + checkcondition_QueryOperand)) { if (ptr->pos < ext->p) { From 6ac24ae4101cc3f4aa322b52eff4f43fc6a94994 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 15 Oct 2020 12:03:29 +0400 Subject: [PATCH 098/182] Make ifdefs same style (cosmetic) --- src/rum_ts_utils.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 617fa202ac..f878241d81 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -244,19 +244,15 @@ rum_tsquery_pre_consistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; -#if PG_VERSION_NUM >= 130000 - res = TS_execute(GETQUERY(query), - &gcv, - TS_EXEC_PHRASE_NO_POS | TS_EXEC_SKIP_NOT, - pre_checkcondition_rum); -#else res = TS_execute(GETQUERY(query), &gcv, - TS_EXEC_PHRASE_NO_POS, - pre_checkcondition_rum); + TS_EXEC_PHRASE_NO_POS +#if PG_VERSION_NUM >= 130000 + | TS_EXEC_SKIP_NOT #endif + , + pre_checkcondition_rum); } - PG_RETURN_BOOL(res); } From f9fbf6211ded0638a6187c5ca25e181446a8a715 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 30 Oct 2020 13:06:16 +0400 Subject: [PATCH 099/182] CVE-2020-14350 create extension time attack fix --- gen_rum_sql--1.1--1.2.pl | 4 ++-- rum--1.0.sql | 2 +- rum--1.1--1.2.sql | 4 ++-- rum--1.1.sql | 2 +- rum--1.2.sql | 6 +++--- rum--1.3.sql | 6 +++--- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gen_rum_sql--1.1--1.2.pl b/gen_rum_sql--1.1--1.2.pl index aeecd71bbe..e8309dc367 100644 --- a/gen_rum_sql--1.1--1.2.pl +++ b/gen_rum_sql--1.1--1.2.pl @@ -82,7 +82,7 @@ LANGUAGE C IMMUTABLE STRICT; -CREATE OR REPLACE FUNCTION rum_anyarray_similar(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; @@ -97,7 +97,7 @@ ); -CREATE OR REPLACE FUNCTION rum_anyarray_distance(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_distance(anyarray,anyarray) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; diff --git a/rum--1.0.sql b/rum--1.0.sql index fc83eed11a..fd2616b204 100644 --- a/rum--1.0.sql +++ b/rum--1.0.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION rumhandler(internal) +CREATE FUNCTION rumhandler(internal) RETURNS index_am_handler AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/rum--1.1--1.2.sql b/rum--1.1--1.2.sql index fad0250c87..f1ea81bc1f 100644 --- a/rum--1.1--1.2.sql +++ b/rum--1.1--1.2.sql @@ -10,7 +10,7 @@ AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; -CREATE OR REPLACE FUNCTION rum_anyarray_similar(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; @@ -25,7 +25,7 @@ CREATE OPERATOR % ( ); -CREATE OR REPLACE FUNCTION rum_anyarray_distance(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_distance(anyarray,anyarray) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; diff --git a/rum--1.1.sql b/rum--1.1.sql index 15b8ebae1c..88762a2411 100644 --- a/rum--1.1.sql +++ b/rum--1.1.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION rumhandler(internal) +CREATE FUNCTION rumhandler(internal) RETURNS index_am_handler AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/rum--1.2.sql b/rum--1.2.sql index 74237fc990..313de039b4 100644 --- a/rum--1.2.sql +++ b/rum--1.2.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION rumhandler(internal) +CREATE FUNCTION rumhandler(internal) RETURNS index_am_handler AS 'MODULE_PATHNAME' LANGUAGE C; @@ -1527,7 +1527,7 @@ AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; -CREATE OR REPLACE FUNCTION rum_anyarray_similar(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; @@ -1542,7 +1542,7 @@ CREATE OPERATOR % ( ); -CREATE OR REPLACE FUNCTION rum_anyarray_distance(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_distance(anyarray,anyarray) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; diff --git a/rum--1.3.sql b/rum--1.3.sql index 40d9418c68..621c4d2b9f 100644 --- a/rum--1.3.sql +++ b/rum--1.3.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION rumhandler(internal) +CREATE FUNCTION rumhandler(internal) RETURNS index_am_handler AS 'MODULE_PATHNAME' LANGUAGE C; @@ -1527,7 +1527,7 @@ AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; -CREATE OR REPLACE FUNCTION rum_anyarray_similar(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; @@ -1542,7 +1542,7 @@ CREATE OPERATOR % ( ); -CREATE OR REPLACE FUNCTION rum_anyarray_distance(anyarray,anyarray) +CREATE FUNCTION rum_anyarray_distance(anyarray,anyarray) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C STRICT STABLE; From bef9b008c7490af30c24e045590504ce845eab3a Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 30 Oct 2020 15:08:09 +0400 Subject: [PATCH 100/182] Test for CVE-2020-14350 --- Makefile | 2 +- expected/security.out | 5 +++++ sql/security.sql | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 expected/security.out create mode 100644 sql/security.sql diff --git a/Makefile b/Makefile index ad8f6cc9d1..570c44fdba 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,7 @@ RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES)) LDFLAGS_SL += $(filter -lm, $(LIBS)) -REGRESS = rum rum_validate rum_hash ruminv timestamp orderby orderby_hash \ +REGRESS = security rum rum_validate rum_hash ruminv timestamp orderby orderby_hash \ altorder altorder_hash limits \ int2 int4 int8 float4 float8 money oid \ time timetz date interval \ diff --git a/expected/security.out b/expected/security.out new file mode 100644 index 0000000000..86fcbf81da --- /dev/null +++ b/expected/security.out @@ -0,0 +1,5 @@ +-- Check security CVE-2020-14350 +CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS $$ SELECT false $$ LANGUAGE SQL; +CREATE EXTENSION rum; +ERROR: function "rum_anyarray_similar" already exists with same argument types +DROP FUNCTION rum_anyarray_similar(anyarray,anyarray); diff --git a/sql/security.sql b/sql/security.sql new file mode 100644 index 0000000000..da7b83957b --- /dev/null +++ b/sql/security.sql @@ -0,0 +1,5 @@ +-- Check security CVE-2020-14350 +CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) RETURNS bool AS $$ SELECT false $$ LANGUAGE SQL; +CREATE EXTENSION rum; +DROP FUNCTION rum_anyarray_similar(anyarray,anyarray); + From 183f32b69d599670629e8f83d889e2926c1cd443 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 9 Nov 2020 10:46:47 +0400 Subject: [PATCH 101/182] Use PG-style type defs to make compatible with pre-C99 (windows) compilers --- src/rumdatapage.c | 8 ++++---- src/rumget.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 484c6137e4..b008d43755 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -736,7 +736,7 @@ RumDataPageAddItem(Page page, void *data, OffsetNumber offset) if (offset <= maxoff) memmove(ptr + sizeof(PostingItem), ptr, - ((uint16_t)(maxoff - offset + 1)) * sizeof(PostingItem)); + ((uint16)(maxoff - offset + 1)) * sizeof(PostingItem)); } memcpy(ptr, data, sizeof(PostingItem)); RumPageGetOpaque(page)->maxoff++; @@ -763,7 +763,7 @@ RumPageDeletePostingItem(Page page, OffsetNumber offset) char *dstptr = RumDataPageGetItem(page, offset), *sourceptr = RumDataPageGetItem(page, offset + 1); - memmove(dstptr, sourceptr, sizeof(PostingItem) * (uint16_t)(maxoff - offset)); + memmove(dstptr, sourceptr, sizeof(PostingItem) * (uint16)(maxoff - offset)); } RumPageGetOpaque(page)->maxoff--; @@ -1246,7 +1246,7 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, Assert(!RumPageIsLeaf(newlPage)); ptr = vector + (off - 1) * sizeofitem; if (maxoff + 1 - off != 0) - memmove(ptr + sizeofitem, ptr, (uint16_t)(maxoff - off + 1) * sizeofitem); + memmove(ptr + sizeofitem, ptr, (uint16)(maxoff - off + 1) * sizeofitem); memcpy(ptr, &(btree->pitem), sizeofitem); maxoff++; @@ -1273,7 +1273,7 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, ptr = RumDataPageGetItem(rPage, FirstOffsetNumber); memcpy(ptr, vector + separator * sizeofitem, - (uint16_t)(maxoff - separator) * sizeofitem); + (uint16)(maxoff - separator) * sizeofitem); RumPageGetOpaque(rPage)->maxoff = maxoff - separator; /* Adjust pd_lower */ ((PageHeader) rPage)->pd_lower = (ptr + diff --git a/src/rumget.c b/src/rumget.c index 0672b1c0e6..571b9cf7e5 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -687,7 +687,7 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) else if (RumGetNPosting(itup) > 0) { entry->nlist = RumGetNPosting(itup); - entry->predictNumberResult = (uint32_t)entry->nlist; + entry->predictNumberResult = (uint32)entry->nlist; entry->list = (RumItem *) palloc(sizeof(RumItem) * entry->nlist); rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); @@ -1104,7 +1104,7 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) else if (RumGetNPosting(itup) > 0) { entry->nlist = RumGetNPosting(itup); - entry->predictNumberResult = (uint32_t)entry->nlist; + entry->predictNumberResult = (uint32)entry->nlist; entry->list = (RumItem *) palloc(sizeof(RumItem) * entry->nlist); rumReadTuple(rumstate, entry->attnum, itup, entry->list, true); From 338c4e177065b8da86619d9a59010fbc75a1e94d Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 7 Dec 2020 11:03:35 +0400 Subject: [PATCH 102/182] Disable switch to bitmap scan seen in certain tests under large artificial slowdown. --- sql/int4.sql | 4 ++-- sql/int8.sql | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/int4.sql b/sql/int4.sql index fa7357b6e6..ed118819fb 100644 --- a/sql/int4.sql +++ b/sql/int4.sql @@ -120,6 +120,8 @@ EXPLAIN (costs off) SELECT id, id |=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; SELECT id, id |=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; +SET enable_bitmapscan=OFF; + EXPLAIN (costs off) SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id <= 400 ORDER BY id; SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id <= 400 ORDER BY id; @@ -133,8 +135,6 @@ CREATE INDEX test_int4_h_a_idx ON test_int4_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; - EXPLAIN (costs off) SELECT count(*) FROM test_int4_h_a WHERE id < 400; SELECT count(*) FROM test_int4_h_a WHERE id < 400; diff --git a/sql/int8.sql b/sql/int8.sql index 540f2b7dbb..22489ff661 100644 --- a/sql/int8.sql +++ b/sql/int8.sql @@ -120,6 +120,8 @@ EXPLAIN (costs off) SELECT id, id |=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; SELECT id, id |=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; +SET enable_bitmapscan=OFF; + EXPLAIN (costs off) SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; @@ -133,8 +135,6 @@ CREATE INDEX test_int8_h_a_idx ON test_int8_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; - EXPLAIN (costs off) SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; From eebf88525a152f5fe54ab3f21593e00dee40f84d Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 8 Dec 2020 00:02:36 +0400 Subject: [PATCH 103/182] Tests fix related to 338c4e177065b8da86619d9a59010fbc75a1e94d --- expected/int4.out | 2 +- expected/int8.out | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/expected/int4.out b/expected/int4.out index 379dd6dea3..30d9840330 100644 --- a/expected/int4.out +++ b/expected/int4.out @@ -510,6 +510,7 @@ SELECT id, id |=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 458 | 58 (5 rows) +SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id <= 400 ORDER BY id; QUERY PLAN @@ -561,7 +562,6 @@ CREATE TABLE test_int4_h_a AS SELECT id::int4, t FROM tsts; CREATE INDEX test_int4_h_a_idx ON test_int4_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int4_h_a WHERE id < 400; QUERY PLAN diff --git a/expected/int8.out b/expected/int8.out index 40b091cdda..2dec31460b 100644 --- a/expected/int8.out +++ b/expected/int8.out @@ -510,6 +510,7 @@ SELECT id, id |=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 458 | 58 (5 rows) +SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; QUERY PLAN @@ -561,7 +562,6 @@ CREATE TABLE test_int8_h_a AS SELECT id::int8, t FROM tsts; CREATE INDEX test_int8_h_a_idx ON test_int8_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; QUERY PLAN From 71ce81a0e792a4cbb6675ab730f0d0446b5e1cbb Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 8 Dec 2020 12:09:07 +0400 Subject: [PATCH 104/182] Further disable bitmapscan in tests to avoid accidental switch of plan ( related to 338c4e177065b8da86619d9a59010fbc75a1e94d ) --- expected/int4.out | 4 ---- expected/int8.out | 4 ---- expected/int8_1.out | 2 -- sql/int4.sql | 6 ------ sql/int8.sql | 5 ----- 5 files changed, 21 deletions(-) diff --git a/expected/int4.out b/expected/int4.out index 30d9840330..00b73e3432 100644 --- a/expected/int4.out +++ b/expected/int4.out @@ -145,7 +145,6 @@ SELECT id FROM test_int4_o WHERE t @@ 'wr&qh' AND id >= 400 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int4_o WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; @@ -258,7 +257,6 @@ CREATE TABLE test_int4_a AS SELECT id::int4, t FROM tsts; CREATE INDEX test_int4_a_idx ON test_int4_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int4_a WHERE id < 400; QUERY PLAN @@ -448,7 +446,6 @@ SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id >= 400 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; @@ -510,7 +507,6 @@ SELECT id, id |=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 458 | 58 (5 rows) -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id <= 400 ORDER BY id; QUERY PLAN diff --git a/expected/int8.out b/expected/int8.out index 2dec31460b..62e4f80a37 100644 --- a/expected/int8.out +++ b/expected/int8.out @@ -145,7 +145,6 @@ SELECT id FROM test_int8_o WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int8_o WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; @@ -258,7 +257,6 @@ CREATE TABLE test_int8_a AS SELECT id::int8, t FROM tsts; CREATE INDEX test_int8_a_idx ON test_int8_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int8_a WHERE id < 400::int8; QUERY PLAN @@ -448,7 +446,6 @@ SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; @@ -510,7 +507,6 @@ SELECT id, id |=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 458 | 58 (5 rows) -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; QUERY PLAN diff --git a/expected/int8_1.out b/expected/int8_1.out index 473eef3c35..194ab765bc 100644 --- a/expected/int8_1.out +++ b/expected/int8_1.out @@ -130,7 +130,6 @@ SELECT id FROM test_int8_o WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int8_o WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; @@ -413,7 +412,6 @@ SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) SELECT id, id <=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id <=> 400 LIMIT 5; diff --git a/sql/int4.sql b/sql/int4.sql index ed118819fb..2fa0e8afec 100644 --- a/sql/int4.sql +++ b/sql/int4.sql @@ -40,7 +40,6 @@ SELECT id FROM test_int4_o WHERE t @@ 'wr&qh' AND id >= 400 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) @@ -66,8 +65,6 @@ CREATE INDEX test_int4_a_idx ON test_int4_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; - EXPLAIN (costs off) SELECT count(*) FROM test_int4_a WHERE id < 400; SELECT count(*) FROM test_int4_a WHERE id < 400; @@ -107,7 +104,6 @@ SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id >= 400 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) @@ -120,8 +116,6 @@ EXPLAIN (costs off) SELECT id, id |=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; SELECT id, id |=> 400 FROM test_int4_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; -SET enable_bitmapscan=OFF; - EXPLAIN (costs off) SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id <= 400 ORDER BY id; SELECT id FROM test_int4_h_o WHERE t @@ 'wr&qh' AND id <= 400 ORDER BY id; diff --git a/sql/int8.sql b/sql/int8.sql index 22489ff661..4ec9bf0abf 100644 --- a/sql/int8.sql +++ b/sql/int8.sql @@ -40,7 +40,6 @@ SELECT id FROM test_int8_o WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id; RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) @@ -66,8 +65,6 @@ CREATE INDEX test_int8_a_idx ON test_int8_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); -SET enable_bitmapscan=OFF; - EXPLAIN (costs off) SELECT count(*) FROM test_int8_a WHERE id < 400::int8; SELECT count(*) FROM test_int8_a WHERE id < 400::int8; @@ -107,7 +104,6 @@ SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id >= 400::int8 ORDER BY id RESET enable_indexscan; RESET enable_indexonlyscan; -RESET enable_bitmapscan; SET enable_seqscan = off; EXPLAIN (costs off) @@ -120,7 +116,6 @@ EXPLAIN (costs off) SELECT id, id |=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; SELECT id, id |=> 400 FROM test_int8_h_o WHERE t @@ 'wr&qh' ORDER BY id |=> 400 LIMIT 5; -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT id FROM test_int8_h_o WHERE t @@ 'wr&qh' AND id <= 400::int8 ORDER BY id; From 7930600bfc3dcdbec1d01e739473c29129be09dd Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 11 Dec 2020 11:00:58 +0400 Subject: [PATCH 105/182] 32-bit test result variant fix (related to 338c4e177065b8da86619d9a59010fbc75a1e94d) --- expected/int8_1.out | 2 -- 1 file changed, 2 deletions(-) diff --git a/expected/int8_1.out b/expected/int8_1.out index 194ab765bc..cbf68dff13 100644 --- a/expected/int8_1.out +++ b/expected/int8_1.out @@ -219,7 +219,6 @@ CREATE INDEX test_int8_a_idx ON test_int8_a USING rum (t rum_tsvector_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); ERROR: doesn't support order index over pass-by-reference column -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int8_a WHERE id < 400::int8; QUERY PLAN @@ -501,7 +500,6 @@ CREATE INDEX test_int8_h_a_idx ON test_int8_h_a USING rum (t rum_tsvector_hash_addon_ops, id) WITH (attach = 'id', to = 't', order_by_attach='t'); ERROR: doesn't support order index over pass-by-reference column -SET enable_bitmapscan=OFF; EXPLAIN (costs off) SELECT count(*) FROM test_int8_h_a WHERE id < 400::int8; QUERY PLAN From 9e1b4419d5da50518f87ba35f26165a10af49509 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 11 Dec 2020 14:37:23 +0000 Subject: [PATCH 106/182] Allocate static pages in memory strictly MAXALIGNed to avoid overflow due to adding padding bytes by Postgres data access alignment macros. This was the source of rare but dangerous segfault on 32-bit FreeBSD but no system was safe as static alignment is completely system/compiler free choice. This problem was hidden by the added completely unrelated variable trace_sort way before the relevant part of the code. It just shifted the alignment of all variables with bigger address values to acceptable but haven't solved the problem at large. --- src/rumdatapage.c | 28 ++++++++++++++++++++++++---- src/rumentrypage.c | 10 ++++++++-- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/src/rumdatapage.c b/src/rumdatapage.c index b008d43755..999b90e726 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -853,7 +853,14 @@ dataPlaceToPage(RumBtree btree, Page page, OffsetNumber off) ItemPointerData iptr = {{0, 0}, 0}; RumItem copyItem; bool copyItemEmpty = true; - char pageCopy[BLCKSZ]; + /* + * Must have pageCopy MAXALIGNed to use PG macros to access data in + * it. Should not rely on compiler alignment preferences to avoid + * pageCopy overflow related to PG in-memory page items alignment + * inside rumDataPageLeafRead() or elsewhere. + */ + char pageCopyStorage[BLCKSZ + MAXIMUM_ALIGNOF]; + char *pageCopy = (char *) MAXALIGN(pageCopyStorage); int maxoff = RumPageGetOpaque(page)->maxoff; int freespace, insertCount = 0; @@ -1055,7 +1062,14 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, RumItem item; int totalCount = 0; int maxItemIndex = btree->curitem; - static char lpageCopy[BLCKSZ]; + /* + * Must have lpageCopy MAXALIGNed to use PG macros to access data in + * it. Should not rely on compiler alignment preferences to avoid + * lpageCopy overflow related to PG in-memory page items alignment + * inside rumDataPageLeafRead() etc. + */ + static char lpageCopyStorage[BLCKSZ + MAXIMUM_ALIGNOF]; + char *lpageCopy = (char *) MAXALIGN(lpageCopyStorage); memset(&item, 0, sizeof(item)); dataPrepareData(btree, newlPage, off); @@ -1233,8 +1247,14 @@ dataSplitPageInternal(RumBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber maxoff = RumPageGetOpaque(newlPage)->maxoff; Size pageSize = PageGetPageSize(newlPage); Size freeSpace; - - static char vector[2 * BLCKSZ]; + /* + * Must have vector MAXALIGNed to use PG macros to access data in + * it. Should not rely on compiler alignment preferences to avoid + * vector overflow related to PG in-memory page items alignment + * inside rumDataPageLeafRead() etc. + */ + static char vectorStorage[2 * BLCKSZ + MAXIMUM_ALIGNOF]; + char *vector = (char *) MAXALIGN(vectorStorage); RumInitPage(rPage, RumPageGetOpaque(newlPage)->flags, pageSize); freeSpace = RumDataPageGetFreeSpace(rPage); diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 80257c41bd..c07fc3219a 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -428,8 +428,14 @@ entrySplitPage(RumBtree btree, Buffer lbuf, Buffer rbuf, Page page; Page newlPage = PageGetTempPageCopy(lPage); Size pageSize = PageGetPageSize(newlPage); - - static char tupstore[2 * BLCKSZ]; + /* + * Must have tupstore MAXALIGNed to use PG macros to access data in + * it. Should not rely on compiler alignment preferences to avoid + * tupstore overflow related to PG in-memory page items alignment + * inside rumDataPageLeafRead() or elsewhere. + */ + static char tupstoreStorage[2 * BLCKSZ + MAXIMUM_ALIGNOF]; + char *tupstore = (char *) MAXALIGN(tupstoreStorage); entryPreparePage(btree, newlPage, off); From 2c794bcc844397517109cfc61e166ae04e226552 Mon Sep 17 00:00:00 2001 From: 0xflotus <0xflotus@gmail.com> Date: Wed, 28 Apr 2021 10:09:26 +0200 Subject: [PATCH 107/182] fix: small errors --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 98a13bb17f..74871571c0 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ on the `GIN` access methods code. - Slow ranking. It is need position information about lexems to ranking. `GIN` index doesn't store positions of lexems. So after index scan we need additional -heap scan to retreive lexems positions. +heap scan to retrieve lexems positions. - Slow phrase search with `GIN` index. This problem relates with previous problem. It is need position information to perform phrase search. - Slow ordering by timestamp. `GIN` index can't store some related information @@ -239,7 +239,7 @@ SELECT * FROM query For type: `anyarray` -This operator class stores `anyarrray` elements with length of the array. +This operator class stores `anyarray` elements with length of the array. Supports operators `&&`, `@>`, `<@`, `=`, `%` operators. Supports ordering by `<=>` operator. For example we have the table: @@ -278,7 +278,7 @@ SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; For type: `anyarray` -This operator class stores `anyarrray` elements with any supported by module +This operator class stores `anyarray` elements with any supported by module field. ## Todo From 8524bf0e9228982db1dbb9a8890dc0281c3143df Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 28 May 2021 15:36:45 +0400 Subject: [PATCH 108/182] [PGPRO-5136] Fix bug with losing results in phrase search in index without positional information (e.g. addon_ops). Per report: https://github.com/postgrespro/rum/issues/93 tags: rum --- src/rum_ts_utils.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index f878241d81..a1c5c357fd 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -600,6 +600,14 @@ rum_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, if (curitem->qoperator.oper == OP_PHRASE) { + /* In case of index where position is not available + * (e.g. addon_ops) output TS_MAYBE even in case both + * lmatch and rmatch are TS_YES. Otherwise we can lose + * results of phrase queries. + */ + if (flags & TS_EXEC_PHRASE_NO_POS) + return TS_MAYBE; + /* * Compute Loffset and Roffset suitable for phrase match, and * compute overall width of whole phrase match. @@ -829,13 +837,10 @@ rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, case OP_PHRASE: /* - * If we get a MAYBE result, and the caller doesn't want that, - * convert it to NO. It would be more consistent, perhaps, to - * return the result of TS_phrase_execute() verbatim and then - * convert MAYBE results at the top of the recursion. But - * converting at the topmost phrase operator gives results that - * are bug-compatible with the old implementation, so do it like - * this for now. + * Checking for TS_EXEC_PHRASE_NO_POS has been moved inside + * rum_phrase_execute, otherwise we can lose results of phrase + * operator when position information is not available in index + * (e.g. index built with addon_ops) */ switch (rum_phrase_execute(curitem, arg, flags, chkcond, NULL)) { @@ -844,7 +849,7 @@ rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, case TS_YES: return TS_YES; case TS_MAYBE: - return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO; + return TS_MAYBE; } break; From 39ccbd636edae19fb04f69e07844c32470056746 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 28 May 2021 16:04:53 +0400 Subject: [PATCH 109/182] [PGPRO-5136] Revert an unnecessary part of a change 8524bf0e9228982db1dbb9a8890dc0281c3143df tags: rum --- src/rum_ts_utils.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index a1c5c357fd..5a335daaf5 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -837,6 +837,14 @@ rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, case OP_PHRASE: /* + * If we get a MAYBE result, and the caller doesn't want that, + * convert it to NO. It would be more consistent, perhaps, to + * return the result of TS_phrase_execute() verbatim and then + * convert MAYBE results at the top of the recursion. But + * converting at the topmost phrase operator gives results that + * are bug-compatible with the old implementation, so do it like + * this for now. + * * Checking for TS_EXEC_PHRASE_NO_POS has been moved inside * rum_phrase_execute, otherwise we can lose results of phrase * operator when position information is not available in index @@ -849,7 +857,7 @@ rum_TS_execute(QueryItem *curitem, void *arg, uint32 flags, case TS_YES: return TS_YES; case TS_MAYBE: - return TS_MAYBE; + return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO; } break; From dccdc8ecfb0e1a72173e215f8f0efc9e84cb705f Mon Sep 17 00:00:00 2001 From: Alexey Masterov Date: Mon, 31 May 2021 10:46:54 +0300 Subject: [PATCH 110/182] [PGPRO-4864] Fix for errors in rum test causing lag between write and replay on replica. Now test will wait for reply on target database not write to wal tags: rum --- t/001_wal.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/001_wal.pl b/t/001_wal.pl index 99415afb63..1ee47b76ae 100644 --- a/t/001_wal.pl +++ b/t/001_wal.pl @@ -23,12 +23,12 @@ sub test_index_replay if ($server_version < 100000) { $caughtup_query = - "SELECT pg_current_xlog_location() <= write_location FROM pg_stat_replication WHERE application_name = '$applname';"; + "SELECT pg_current_xlog_location() <= replay_location FROM pg_stat_replication WHERE application_name = '$applname';"; } else { $caughtup_query = - "SELECT pg_current_wal_lsn() <= write_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; + "SELECT pg_current_wal_lsn() <= replay_lsn FROM pg_stat_replication WHERE application_name = '$applname';"; } $node_master->poll_query_until('postgres', $caughtup_query) or die "Timed out while waiting for standby 1 to catch up"; From 727d692ae784ff2bc7484c59817405f9be368055 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 31 May 2021 15:31:52 +0400 Subject: [PATCH 111/182] Test for correct work of phrase operator in index where position information is not added. --- expected/rum.out | 18 ++++++++++++++++++ sql/rum.sql | 9 +++++++++ 2 files changed, 27 insertions(+) diff --git a/expected/rum.out b/expected/rum.out index db08b158dd..5966d196fe 100644 --- a/expected/rum.out +++ b/expected/rum.out @@ -381,6 +381,24 @@ SELECT (a <=> to_tsquery('pg_catalog.english', 'b:*'))::numeric(10,4) AS distanc 16.4493 | the few that escaped destruction in 1693. It is a beautiful, highly | '1693':7 'beauti':11 'destruct':5 'escap':4 'high':12 (20 rows) +-- Test correct work of phrase operator when position information is not in index. +create table test_rum_addon as table test_rum; +alter table test_rum_addon add column id serial; +create index on test_rum_addon using rum (a rum_tsvector_addon_ops, id) with (attach = 'id', to='a'); +select * from test_rum_addon where a @@ to_tsquery('pg_catalog.english', 'half <-> way'); + t | a | id +---------------------------------------------------------------------+---------------------------------------------------------+---- + itself. Put on your "specs" and look at the castle, half way up the | 'castl':10 'half':11 'look':7 'put':2 'spec':5 'way':12 | 9 +(1 row) + +explain (costs off) select * from test_rum_addon where a @@ to_tsquery('pg_catalog.english', 'half <-> way'); + QUERY PLAN +------------------------------------------------------------ + Index Scan using test_rum_addon_a_id_idx on test_rum_addon + Index Cond: (a @@ '''half'' <-> ''way'''::tsquery) +(2 rows) + +-- select ('bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; distance ---------- diff --git a/sql/rum.sql b/sql/rum.sql index de432fde1a..8414bb95c5 100644 --- a/sql/rum.sql +++ b/sql/rum.sql @@ -141,5 +141,14 @@ SELECT (a <=> to_tsquery('pg_catalog.english', 'b:*'))::numeric(10,4) AS distanc WHERE a @@ to_tsquery('pg_catalog.english', 'b:*') ORDER BY a <=> to_tsquery('pg_catalog.english', 'b:*'); +-- Test correct work of phrase operator when position information is not in index. +create table test_rum_addon as table test_rum; +alter table test_rum_addon add column id serial; +create index on test_rum_addon using rum (a rum_tsvector_addon_ops, id) with (attach = 'id', to='a'); + +select * from test_rum_addon where a @@ to_tsquery('pg_catalog.english', 'half <-> way'); +explain (costs off) select * from test_rum_addon where a @@ to_tsquery('pg_catalog.english', 'half <-> way'); +-- + select ('bjarn:6237 stroustrup:6238'::tsvector <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; SELECT ('stroustrup:5508B,6233B,6238B bjarn:6235B,6237B' <=> 'bjarn <-> stroustrup'::tsquery)::numeric(10,5) AS distance; From 2dd38404955b2c31195c45c733db06600f7eef9e Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 8 Oct 2021 13:20:34 +0400 Subject: [PATCH 112/182] Compatibility with PG 14 --- src/rum.h | 3 +++ src/ruminsert.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/rum.h b/src/rum.h index 36f2e21f4b..8f54edd5d4 100644 --- a/src/rum.h +++ b/src/rum.h @@ -449,6 +449,9 @@ extern void rumbuildempty(Relation index); extern bool ruminsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique +#if PG_VERSION_NUM >= 140000 + , bool indexUnchanged +#endif #if PG_VERSION_NUM >= 100000 , struct IndexInfo *indexInfo #endif diff --git a/src/ruminsert.c b/src/ruminsert.c index 7315f517f3..f42c8a9526 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -818,6 +818,9 @@ bool ruminsert(Relation index, Datum *values, bool *isnull, ItemPointer ht_ctid, Relation heapRel, IndexUniqueCheck checkUnique +#if PG_VERSION_NUM >= 140000 + , bool indexUnchanged +#endif #if PG_VERSION_NUM >= 100000 , struct IndexInfo *indexInfo #endif From 7b71c50c377ab4476d2ab19c8bb333567676838e Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 10 Nov 2021 14:27:38 +0400 Subject: [PATCH 113/182] Add test for partial order query --- expected/altorder.out | 127 ++++++++++++++++++++++++++++++++++++++++ expected/altorder_1.out | 127 ++++++++++++++++++++++++++++++++++++++++ sql/altorder.sql | 3 + 3 files changed, 257 insertions(+) diff --git a/expected/altorder.out b/expected/altorder.out index 9f7178b86a..dcf5a51954 100644 --- a/expected/altorder.out +++ b/expected/altorder.out @@ -432,3 +432,130 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (32 rows) +EXPLAIN (costs off) +SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Index Scan using atsts_idx on atsts + Index Cond: ((t @@ '''wr'' & ''q'':*'::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(4 rows) + +SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 361 | Mon May 16 20:21:22.326724 2016 + 361 | Mon May 16 20:21:22.326724 2016 + 361 | Mon May 16 20:21:22.326724 2016 + 361 | Mon May 16 20:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 +(112 rows) + diff --git a/expected/altorder_1.out b/expected/altorder_1.out index 5bd8925d7c..2fb7f52326 100644 --- a/expected/altorder_1.out +++ b/expected/altorder_1.out @@ -431,3 +431,130 @@ SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 496 | Sun May 22 11:21:22.326724 2016 (32 rows) +EXPLAIN (costs off) +SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Sort + Sort Key: d + -> Seq Scan on atsts + Filter: ((t @@ '''wr'' & ''q'':*'::tsquery) AND (d >= 'Mon May 16 14:21:25 2016'::timestamp without time zone)) +(4 rows) + +SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d; + id | d +-----+--------------------------------- + 361 | Mon May 16 20:21:22.326724 2016 + 361 | Mon May 16 20:21:22.326724 2016 + 361 | Mon May 16 20:21:22.326724 2016 + 361 | Mon May 16 20:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 369 | Tue May 17 04:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 371 | Tue May 17 06:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 372 | Tue May 17 07:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 375 | Tue May 17 10:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 388 | Tue May 17 23:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 405 | Wed May 18 16:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 406 | Wed May 18 17:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 415 | Thu May 19 02:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 422 | Thu May 19 09:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 428 | Thu May 19 15:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 441 | Fri May 20 04:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 444 | Fri May 20 07:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 457 | Fri May 20 20:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 458 | Fri May 20 21:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 463 | Sat May 21 02:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 465 | Sat May 21 04:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 466 | Sat May 21 05:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 468 | Sat May 21 07:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 471 | Sat May 21 10:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 475 | Sat May 21 14:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 481 | Sat May 21 20:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 484 | Sat May 21 23:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 485 | Sun May 22 00:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 493 | Sun May 22 08:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 496 | Sun May 22 11:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 499 | Sun May 22 14:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 + 506 | Sun May 22 21:21:22.326724 2016 +(112 rows) + diff --git a/sql/altorder.sql b/sql/altorder.sql index 86858d1fbb..bc89f8fc06 100644 --- a/sql/altorder.sql +++ b/sql/altorder.sql @@ -81,3 +81,6 @@ EXPLAIN (costs off) SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; SELECT id, d FROM atsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d; +EXPLAIN (costs off) +SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d; +SELECT id, d FROM atsts WHERE t @@ 'wr&q:*' AND d >= '2016-05-16 14:21:25' ORDER BY d; From 8b2a4023b5d63d9d0e66965f92fa778229b6114f Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 10 Nov 2021 12:54:50 +0400 Subject: [PATCH 114/182] Make rumsort use vanilla tuplesort functions, part 1: -Add vanilla tuplesort.c and qsort_tuple.c files --- src/qsort_tuple.c | 332 ++++ src/tuplesort10.c | 4467 +++++++++++++++++++++++++++++++++++++++++ src/tuplesort11.c | 4593 ++++++++++++++++++++++++++++++++++++++++++ src/tuplesort12.c | 4594 ++++++++++++++++++++++++++++++++++++++++++ src/tuplesort13.c | 4706 +++++++++++++++++++++++++++++++++++++++++++ src/tuplesort14.c | 4782 ++++++++++++++++++++++++++++++++++++++++++++ src/tuplesort15.c | 4698 +++++++++++++++++++++++++++++++++++++++++++ src/tuplesort96.c | 4836 +++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 33008 insertions(+) create mode 100644 src/qsort_tuple.c create mode 100644 src/tuplesort10.c create mode 100644 src/tuplesort11.c create mode 100644 src/tuplesort12.c create mode 100644 src/tuplesort13.c create mode 100644 src/tuplesort14.c create mode 100644 src/tuplesort15.c create mode 100644 src/tuplesort96.c diff --git a/src/qsort_tuple.c b/src/qsort_tuple.c new file mode 100644 index 0000000000..0cb46e1416 --- /dev/null +++ b/src/qsort_tuple.c @@ -0,0 +1,332 @@ +/* + * autogenerated by src/backend/utils/sort/gen_qsort_tuple.pl, do not edit! + * + * This file is included by tuplesort.c, rather than compiled separately. + */ + +/* $NetBSD: qsort.c,v 1.13 2003/08/07 16:43:42 agc Exp $ */ + +/*- + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Qsort routine based on J. L. Bentley and M. D. McIlroy, + * "Engineering a sort function", + * Software--Practice and Experience 23 (1993) 1249-1265. + * + * We have modified their original by adding a check for already-sorted input, + * which seems to be a win per discussions on pgsql-hackers around 2006-03-21. + * + * Also, we recurse on the smaller partition and iterate on the larger one, + * which ensures we cannot recurse more than log(N) levels (since the + * partition recursed to is surely no more than half of the input). Bentley + * and McIlroy explicitly rejected doing this on the grounds that it's "not + * worth the effort", but we have seen crashes in the field due to stack + * overrun, so that judgment seems wrong. + */ + +static void +swapfunc(SortTuple *a, SortTuple *b, size_t n) +{ + do + { + SortTuple t = *a; + *a++ = *b; + *b++ = t; + } while (--n > 0); +} + +#define swap(a, b) \ + do { \ + SortTuple t = *(a); \ + *(a) = *(b); \ + *(b) = t; \ + } while (0) + +#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n) + +static SortTuple * +med3_tuple(SortTuple *a, SortTuple *b, SortTuple *c, SortTupleComparator cmp_tuple, Tuplesortstate *state) +{ + return cmp_tuple(a, b, state) < 0 ? + (cmp_tuple(b, c, state) < 0 ? b : + (cmp_tuple(a, c, state) < 0 ? c : a)) + : (cmp_tuple(b, c, state) > 0 ? b : + (cmp_tuple(a, c, state) < 0 ? a : c)); +} + +static void +qsort_tuple(SortTuple *a, size_t n, SortTupleComparator cmp_tuple, Tuplesortstate *state) +{ + SortTuple *pa, + *pb, + *pc, + *pd, + *pl, + *pm, + *pn; + size_t d1, + d2; + int r, + presorted; + +loop: + CHECK_FOR_INTERRUPTS(); + if (n < 7) + { + for (pm = a + 1; pm < a + n; pm++) + for (pl = pm; pl > a && cmp_tuple(pl - 1, pl, state) > 0; pl--) + swap(pl, pl - 1); + return; + } + presorted = 1; + for (pm = a + 1; pm < a + n; pm++) + { + CHECK_FOR_INTERRUPTS(); + if (cmp_tuple(pm - 1, pm, state) > 0) + { + presorted = 0; + break; + } + } + if (presorted) + return; + pm = a + (n / 2); + if (n > 7) + { + pl = a; + pn = a + (n - 1); + if (n > 40) + { + size_t d = (n / 8); + + pl = med3_tuple(pl, pl + d, pl + 2 * d, cmp_tuple, state); + pm = med3_tuple(pm - d, pm, pm + d, cmp_tuple, state); + pn = med3_tuple(pn - 2 * d, pn - d, pn, cmp_tuple, state); + } + pm = med3_tuple(pl, pm, pn, cmp_tuple, state); + } + swap(a, pm); + pa = pb = a + 1; + pc = pd = a + (n - 1); + for (;;) + { + while (pb <= pc && (r = cmp_tuple(pb, a, state)) <= 0) + { + if (r == 0) + { + swap(pa, pb); + pa++; + } + pb++; + CHECK_FOR_INTERRUPTS(); + } + while (pb <= pc && (r = cmp_tuple(pc, a, state)) >= 0) + { + if (r == 0) + { + swap(pc, pd); + pd--; + } + pc--; + CHECK_FOR_INTERRUPTS(); + } + if (pb > pc) + break; + swap(pb, pc); + pb++; + pc--; + } + pn = a + n; + d1 = Min(pa - a, pb - pa); + vecswap(a, pb - d1, d1); + d1 = Min(pd - pc, pn - pd - 1); + vecswap(pb, pn - d1, d1); + d1 = pb - pa; + d2 = pd - pc; + if (d1 <= d2) + { + /* Recurse on left partition, then iterate on right partition */ + if (d1 > 1) + qsort_tuple(a, d1, cmp_tuple, state); + if (d2 > 1) + { + /* Iterate rather than recurse to save stack space */ + /* qsort_tuple(pn - d2, d2, cmp_tuple, state); */ + a = pn - d2; + n = d2; + goto loop; + } + } + else + { + /* Recurse on right partition, then iterate on left partition */ + if (d2 > 1) + qsort_tuple(pn - d2, d2, cmp_tuple, state); + if (d1 > 1) + { + /* Iterate rather than recurse to save stack space */ + /* qsort_tuple(a, d1, cmp_tuple, state); */ + n = d1; + goto loop; + } + } +} + +#define cmp_ssup(a, b, ssup) \ + ApplySortComparator((a)->datum1, (a)->isnull1, \ + (b)->datum1, (b)->isnull1, ssup) + +static SortTuple * +med3_ssup(SortTuple *a, SortTuple *b, SortTuple *c, SortSupport ssup) +{ + return cmp_ssup(a, b, ssup) < 0 ? + (cmp_ssup(b, c, ssup) < 0 ? b : + (cmp_ssup(a, c, ssup) < 0 ? c : a)) + : (cmp_ssup(b, c, ssup) > 0 ? b : + (cmp_ssup(a, c, ssup) < 0 ? a : c)); +} + +static void +qsort_ssup(SortTuple *a, size_t n, SortSupport ssup) +{ + SortTuple *pa, + *pb, + *pc, + *pd, + *pl, + *pm, + *pn; + size_t d1, + d2; + int r, + presorted; + +loop: + CHECK_FOR_INTERRUPTS(); + if (n < 7) + { + for (pm = a + 1; pm < a + n; pm++) + for (pl = pm; pl > a && cmp_ssup(pl - 1, pl, ssup) > 0; pl--) + swap(pl, pl - 1); + return; + } + presorted = 1; + for (pm = a + 1; pm < a + n; pm++) + { + CHECK_FOR_INTERRUPTS(); + if (cmp_ssup(pm - 1, pm, ssup) > 0) + { + presorted = 0; + break; + } + } + if (presorted) + return; + pm = a + (n / 2); + if (n > 7) + { + pl = a; + pn = a + (n - 1); + if (n > 40) + { + size_t d = (n / 8); + + pl = med3_ssup(pl, pl + d, pl + 2 * d, ssup); + pm = med3_ssup(pm - d, pm, pm + d, ssup); + pn = med3_ssup(pn - 2 * d, pn - d, pn, ssup); + } + pm = med3_ssup(pl, pm, pn, ssup); + } + swap(a, pm); + pa = pb = a + 1; + pc = pd = a + (n - 1); + for (;;) + { + while (pb <= pc && (r = cmp_ssup(pb, a, ssup)) <= 0) + { + if (r == 0) + { + swap(pa, pb); + pa++; + } + pb++; + CHECK_FOR_INTERRUPTS(); + } + while (pb <= pc && (r = cmp_ssup(pc, a, ssup)) >= 0) + { + if (r == 0) + { + swap(pc, pd); + pd--; + } + pc--; + CHECK_FOR_INTERRUPTS(); + } + if (pb > pc) + break; + swap(pb, pc); + pb++; + pc--; + } + pn = a + n; + d1 = Min(pa - a, pb - pa); + vecswap(a, pb - d1, d1); + d1 = Min(pd - pc, pn - pd - 1); + vecswap(pb, pn - d1, d1); + d1 = pb - pa; + d2 = pd - pc; + if (d1 <= d2) + { + /* Recurse on left partition, then iterate on right partition */ + if (d1 > 1) + qsort_ssup(a, d1, ssup); + if (d2 > 1) + { + /* Iterate rather than recurse to save stack space */ + /* qsort_ssup(pn - d2, d2, ssup); */ + a = pn - d2; + n = d2; + goto loop; + } + } + else + { + /* Recurse on right partition, then iterate on left partition */ + if (d2 > 1) + qsort_ssup(pn - d2, d2, ssup); + if (d1 > 1) + { + /* Iterate rather than recurse to save stack space */ + /* qsort_ssup(a, d1, ssup); */ + n = d1; + goto loop; + } + } +} diff --git a/src/tuplesort10.c b/src/tuplesort10.c new file mode 100644 index 0000000000..80bc67bc9e --- /dev/null +++ b/src/tuplesort10.c @@ -0,0 +1,4467 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about the external + * sorting algorithm. Historically, we divided the input into sorted runs + * using replacement selection, in the form of a priority tree implemented + * as a heap (essentially his Algorithm 5.2.3H), but now we only do that + * for the first run, and only if the run would otherwise end up being very + * short. We merge the runs using polyphase merge, Knuth's Algorithm + * 5.4.2D. The logical "tapes" used by Algorithm D are implemented by + * logtape.c, which avoids space wastage by recycling disk space as soon + * as each block is read from its "tape". + * + * We do not use Knuth's recommended data structure (Algorithm 5.4.1R) for + * the replacement selection, because it uses a fixed number of records + * in memory at all times. Since we are dealing with tuples that may vary + * considerably in size, we want to be able to vary the number of records + * kept in memory to ensure full utilization of the allowed sort memory + * space. So, we keep the tuples in a variable-size heap, with the next + * record to go out at the top of the heap. Like Algorithm 5.4.1R, each + * record is stored with the run number that it must go into, and we use + * (run number, key) as the ordering key for the heap. When the run number + * at the top of the heap changes, we know that no more records of the prior + * run are left in the heap. Note that there are in practice only ever two + * distinct run numbers, because since PostgreSQL 9.6, we only use + * replacement selection to form the first run. + * + * In PostgreSQL 9.6, a heap (based on Knuth's Algorithm H, with some small + * customizations) is only used with the aim of producing just one run, + * thereby avoiding all merging. Only the first run can use replacement + * selection, which is why there are now only two possible valid run + * numbers, and why heapification is customized to not distinguish between + * tuples in the second run (those will be quicksorted). We generally + * prefer a simple hybrid sort-merge strategy, where runs are sorted in much + * the same way as the entire input of an internal sort is sorted (using + * qsort()). The replacement_sort_tuples GUC controls the limited remaining + * use of replacement selection for the first run. + * + * There are several reasons to favor a hybrid sort-merge strategy. + * Maintaining a priority tree/heap has poor CPU cache characteristics. + * Furthermore, the growth in main memory sizes has greatly diminished the + * value of having runs that are larger than available memory, even in the + * case where there is partially sorted input and runs can be made far + * larger by using a heap. In most cases, a single-pass merge step is all + * that is required even when runs are no larger than available memory. + * Avoiding multiple merge passes was traditionally considered to be the + * major advantage of using replacement selection. + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape (selected per Algorithm D). After the end of the + * input is reached, we dump out remaining tuples in memory into a final run + * (or two, when replacement selection is still used), then merge the runs + * using Algorithm D. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and replace it + * with the next tuple from its source tape (if any). When the heap empties, + * the merge is complete. The basic merge algorithm thus needs very little + * memory --- only M tuples for an M-way merge, and M is constrained to a + * small number. However, we can still make good use of our full workMem + * allocation by pre-reading additional blocks from each source tape. Without + * prereading, our access pattern to the temporary file would be very erratic; + * on average we'd read one block from each of M source tapes during the same + * time that we're writing M blocks to the output tape, so there is no + * sequentiality of access at all, defeating the read-ahead methods used by + * most Unix kernels. Worse, the output tape gets written into a very random + * sequence of blocks of the temp file, ensuring that things will be even + * worse when it comes time to read that tape. A straightforward merge pass + * thus ends up doing a lot of waiting for disk seeks. We can improve matters + * by prereading from each source tape sequentially, loading about workMem/M + * bytes from each tape in turn, and making the sequential blocks immediately + * available for reuse. This approach helps to localize both read and write + * accesses. The pre-reading is handled by logtape.c, we just tell it how + * much memory to use for the buffers. + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the + * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according + * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that + * tape drives are expensive beasts, and in particular that there will always + * be many more runs than tape drives. In our implementation a "tape drive" + * doesn't cost much more than a few Kb of memory buffers, so we can afford + * to have lots of them. In particular, if we can have as many tape drives + * as sorted runs, we can eliminate any repeated I/O at all. In the current + * code we determine the number of tapes M on the basis of workMem: we want + * workMem/M to be large enough that we read a fair amount of data each time + * we preread from a tape, so as to maintain the locality of access described + * above. Nonetheless, with large workMem we can have many tapes (but not + * too many -- see the comments in tuplesort_merge_order). + * + * + * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/hash.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during merge, when we use a + * simple slab allocator). SortTuples also contain the tuple's first key + * column in Datum/nullflag format, and an index integer. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + * + * While building initial runs, tupindex holds the tuple's run number. + * Historically, the run number could meaningfully distinguish many runs, but + * it now only distinguishes RUN_FIRST and HEAP_RUN_NEXT, since replacement + * selection is always abandoned after the first run; no other run number + * should be represented here. During merge passes, we re-use it to hold the + * input tape number that each tuple in the heap was read from. tupindex goes + * unused if the sort occurs entirely in memory. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int tupindex; /* see notes above */ +} SortTuple; + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much data we'd like to read from each input + * tape during a preread cycle (see discussion at top of file). + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + + /* + * Run numbers, used during external sort operations. + * + * HEAP_RUN_NEXT is only used for SortTuple.tupindex, never state.currentRun. + */ +#define RUN_FIRST 0 +#define HEAP_RUN_NEXT INT_MAX +#define RUN_SECOND 1 + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* number of tapes (Knuth's T) */ + int tapeRange; /* maxTapes-1 (Knuth's P) */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. Unless the slab allocator is + * used, after writing the tuple, pfree() the out-of-line data (not the + * SortTuple struct!), and increase state->availMem by the amount of + * memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, int tapenum, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. The tuple is allocated + * from the slab memory arena, or is palloc'd, see readtup_alloc(). + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Buffer size to use for reading input tapes, during merge. */ + size_t read_buffer_size; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this indicates if the replacement + * selection strategy is in use. When it isn't, then a simple hybrid + * sort-merge strategy is in use instead (runs are quicksorted). + */ + bool replaceActive; + + /* + * While building initial runs, this is the current output run number + * (starting at RUN_FIRST). Afterwards, it is the number of initial runs + * we made. + */ + int currentRun; + + /* + * Unless otherwise noted, all pointer variables below are pointers to + * arrays of length maxTapes, holding per-tape data. + */ + + /* + * This variable is only used during merge passes. mergeactive[i] is true + * if we are reading an input run from (actual) tape number i and have not + * yet exhausted that run. + */ + bool *mergeactive; /* active input run source? */ + + /* + * Variables for Algorithm D. Note that destTape is a "logical" tape + * number, ie, an index into the tp_xxx[] arrays. Be careful to keep + * "logical" and "actual" tape numbers straight! + */ + int Level; /* Knuth's l */ + int destTape; /* current output tape (Knuth's j, less 1) */ + int *tp_fib; /* Target Fibonacci run counts (A[]) */ + int *tp_runs; /* # of real runs on each tape */ + int *tp_dummy; /* # of dummy runs for each tape (D[]) */ + int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ + int activeTapes; /* # of active input tapes in merge pass */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + int result_tape; /* actual tape number of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on readtup and copytup routines + * to use the right memory context for these tuples (and to not use the + * reset context for anything whose lifetime needs to span multiple + * external sort runs). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ + do { \ + if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static bool useselection(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state); +static void selectnewtape(Tuplesortstate *state); +static void init_slab_allocator(Tuplesortstate *state, int numSlots); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state); +static bool mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void dumpbatch(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple, + bool checkIndex); +static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple, + bool checkIndex); +static void tuplesort_heap_delete_top(Tuplesortstate *state, bool checkIndex); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK); +static void markrunend(Tuplesortstate *state, int tapenum); +static void *readtup_alloc(Tuplesortstate *state, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ +#include "qsort_tuple.c" + + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext sortcontext; + MemoryContext tuplecontext; + MemoryContext oldcontext; + + /* + * Create a working memory context for this sort operation. All data + * needed by the sort will live inside this context. + */ + sortcontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + tuplecontext = AllocSetContextCreate(sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + /* + * Make the Tuplesortstate within the per-sort context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(sortcontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->status = TSS_INITIAL; + state->randomAccess = randomAccess; + state->bounded = false; + state->tuples = true; + state->boundUsed = false; + state->allowedMem = workMem * (int64) 1024; + state->availMem = state->allowedMem; + state->sortcontext = sortcontext; + state->tuplecontext = tuplecontext; + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = Max(1024, + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); + + state->growmemtuples = true; + state->slabAllocatorUsed = false; + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = RUN_FIRST; + + /* + * maxTapes, tapeRange, and Algorithm D variables will be initialized by + * inittapes(), if needed + */ + + state->result_tape = -1; /* flag that result tape has not been formed */ + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + ScanKey indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = RelationGetNumberOfAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey_nodata(indexRel); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + _bt_freeskey(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + ScanKey indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = RelationGetNumberOfAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey_nodata(indexRel); + state->nKeys = RelationGetNumberOfAttributes(indexRel); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + _bt_freeskey(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple is needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each sortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL); + Assert(state->memtupcount == 0); + Assert(!state->bounded); + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "external sort ended, %ld disk blocks used: %s", + spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "internal sort ended, %ld KB used: %s", + spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory, + * including the Tuplesortstate struct itself. + */ + MemoryContextDelete(state->sortcontext); +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return TRUE if we were able to enlarge the array, FALSE if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state); + + /* + * Dump tuples until we are back under the limit. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, replacing it with the new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuple->tupindex = 0; /* not used */ + tuplesort_heap_replace_top(state, tuple, false); + } + break; + + case TSS_BUILDRUNS: + + /* + * Insert the tuple into the heap, with run number currentRun if + * it can go into the current run, else HEAP_RUN_NEXT. The tuple + * can go into the current run if it is >= the first + * not-yet-output tuple. (Actually, it could go into the current + * run if it is >= the most recently output tuple ... but that + * would require keeping around the tuple we last output, and it's + * simplest to let writetup free each tuple as soon as it's + * written.) + * + * Note that this only applies when: + * + * - currentRun is RUN_FIRST + * + * - Replacement selection is in use (typically it is never used). + * + * When these two conditions are not both true, all tuples are + * appended indifferently, much like the TSS_INITIAL case. + * + * There should always be room to store the incoming tuple. + */ + Assert(!state->replaceActive || state->memtupcount > 0); + if (state->replaceActive && + COMPARETUP(state, tuple, &state->memtuples[0]) >= 0) + { + Assert(state->currentRun == RUN_FIRST); + + /* + * Insert tuple into first, fully heapified run. + * + * Unlike classic replacement selection, which this module was + * previously based on, only RUN_FIRST tuples are fully + * heapified. Any second/next run tuples are appended + * indifferently. While HEAP_RUN_NEXT tuples may be sifted + * out of the way of first run tuples, COMPARETUP() will never + * be called for the run's tuples during sifting (only our + * initial COMPARETUP() call is required for the tuple, to + * determine that the tuple does not belong in RUN_FIRST). + */ + tuple->tupindex = state->currentRun; + tuplesort_heap_insert(state, tuple, true); + } + else + { + /* + * Tuple was determined to not belong to heapified RUN_FIRST, + * or replacement selection not in play. Append the tuple to + * memtuples indifferently. + * + * dumptuples() does not trust that the next run's tuples are + * heapified. Anything past the first run will always be + * quicksorted even when replacement selection is initially + * used. (When it's never used, every tuple still takes this + * path.) + */ + tuple->tupindex = HEAP_RUN_NEXT; + state->memtuples[state->memtupcount++] = *tuple; + } + + /* + * If we are over the memory limit, dump tuples till we're under. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort starting: %s", + pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory. Just qsort 'em and we're done. + */ + tuplesort_sort_memtuples(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess, one run per tape). Note that + * mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort done (except %d-way final merge): %s", + state->activeTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort done: %s", + pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns FALSE if no more tuples. + * Returned tuple belongs to tuplesort memory context, and must not be freed + * by caller. Note that fetched tuple is stored in memory that may be + * recycled by any future fetch. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup) +{ + unsigned int tuplen; + size_t nmoved; + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->slabAllocatorUsed); + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(state->slabAllocatorUsed); + + /* + * The slot that held the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + if (forward) + { + if (state->eof_reached) + return false; + + if ((tuplen = getlen(state, state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle + * its memory on next call. (This can be NULL, in the + * !state->tuples case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + 2 * sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != 2 * sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + tuplen = getlen(state, state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + 2 * sizeof(unsigned int)); + if (nmoved == tuplen + sizeof(unsigned int)) + { + /* + * We backed up over the previous tuple, but there was no + * ending length word before it. That means that the prev + * tuple is the first tuple in the file. It is now the + * next to read in forward direction (not obviously right, + * but that is what in-memory case does). + */ + return false; + } + else if (nmoved != tuplen + 2 * sizeof(unsigned int)) + elog(ERROR, "bogus tuple length in backward scan"); + } + + tuplen = getlen(state, state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen); + if (nmoved != tuplen) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle its memory + * on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + + case TSS_FINALMERGE: + Assert(forward); + /* We are managing memory ourselves, with the slab allocator. */ + Assert(state->slabAllocatorUsed); + + /* + * The slab slot holding the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTape = state->memtuples[0].tupindex; + SortTuple newtup; + + *stup = state->memtuples[0]; + + /* + * Remember the tuple we return, so that we can recycle its + * memory on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + /* + * Pull next tuple from tape, and replace the returned tuple + * at top of the heap with it. + */ + if (!mergereadnext(state, srcTape, &newtup)) + { + /* + * If no more data, we've reached end of run on this tape. + * Remove the top node from the heap. + */ + tuplesort_heap_delete_top(state, false); + + /* + * Rewind to free the read buffer. It'd go away at the + * end of the sort anyway, but better to release the + * memory early. + */ + LogicalTapeRewindForWrite(state->tapeset, srcTape); + return true; + } + newtup.tupindex = srcTape; + tuplesort_heap_replace_top(state, &newtup, false); + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return TRUE; else, clear the slot + * and return FALSE. + * + * Caller may optionally be passed back abbreviated value (on TRUE return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * If copy is true, the slot receives a tuple that's been copied into the + * caller's memory context, so that it will stay valid regardless of future + * manipulations of the tuplesort's state (up to and including deleting the + * tuplesort). If copy is false, the slot will just receive a pointer to a + * tuple held within the tuplesort, which is more efficient, but only safe for + * callers that are prepared to have any subsequent manipulation of the + * tuplesort's state invalidate slot contents. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (copy) + stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); + + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns FALSE if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on TRUE return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* use stup.tuple because stup.datum1 may be an abbreviation */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns TRUE if successful, FALSE if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /* + * We need one tape for each merge input, plus another one for the output, + * and each of these tapes needs buffer space. In addition we want + * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't + * count). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + */ + mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / + (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); + + /* + * Even in minimum memory, use at least a MINORDER merge. On the other + * hand, even when we have lots of memory, do not use more than a MAXORDER + * merge. Tapes are pretty cheap, but they're not entirely free. Each + * additional tape reduces the amount of memory available to build runs, + * which in turn can cause the same sort to need more runs, which makes + * merging slower even if it can still be done in a single pass. Also, + * high order merges are quite slow due to CPU cache effects; it can be + * faster to pay the I/O cost of a polyphase merge than to perform a + * single merge pass across many hundreds of tapes. + */ + mOrder = Max(mOrder, MINORDER); + mOrder = Min(mOrder, MAXORDER); + + return mOrder; +} + +/* + * useselection - determine algorithm to use to sort first run. + * + * It can sometimes be useful to use the replacement selection algorithm if it + * results in one large run, and there is little available workMem. See + * remarks on RUN_SECOND optimization within dumptuples(). + */ +static bool +useselection(Tuplesortstate *state) +{ + /* + * memtupsize might be noticeably higher than memtupcount here in atypical + * cases. It seems slightly preferable to not allow recent outliers to + * impact this determination. Note that caller's trace_sort output + * reports memtupcount instead. + */ + if (state->memtupsize <= replacement_sort_tuples) + return true; + + return false; +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we don't have room to sort in memory. + */ +static void +inittapes(Tuplesortstate *state) +{ + int maxTapes, + j; + int64 tapeSpace; + + /* Compute number of tapes to use: merge order plus 1 */ + maxTapes = tuplesort_merge_order(state->allowedMem) + 1; + + state->maxTapes = maxTapes; + state->tapeRange = maxTapes - 1; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to external sort with %d tapes: %s", + maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* + * Decrease availMem to reflect the space needed for tape buffers, when + * writing the initial runs; but don't decrease it to the point that we + * have no room for tuples. (That case is only likely to occur if sorting + * pass-by-value Datums; in all other scenarios the memtuples[] array is + * unlikely to occupy more than half of allowedMem. In the pass-by-value + * case it's not important to account for tuple space, so we don't care if + * LACKMEM becomes inaccurate.) + */ + tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. + */ + PrepareTempTablespaces(); + + /* + * Create the tape set and allocate the per-tape data arrays. + */ + state->tapeset = LogicalTapeSetCreate(maxTapes); + + state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); + state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); + + /* + * Give replacement selection a try based on user setting. There will be + * a switch to a simple hybrid sort-merge strategy after the first run + * (iff we could not output one long run). + */ + state->replaceActive = useselection(state); + + if (state->replaceActive) + { + /* + * Convert the unsorted contents of memtuples[] into a heap. Each + * tuple is marked as belonging to run number zero. + * + * NOTE: we pass false for checkIndex since there's no point in + * comparing indexes in this step, even though we do intend the + * indexes to be part of the sort key... + */ + int ntuples = state->memtupcount; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "replacement selection will sort %d first run tuples", + state->memtupcount); +#endif + state->memtupcount = 0; /* make the heap empty */ + + for (j = 0; j < ntuples; j++) + { + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[j]; + + stup.tupindex = RUN_FIRST; + tuplesort_heap_insert(state, &stup, false); + } + Assert(state->memtupcount == ntuples); + } + + state->currentRun = RUN_FIRST; + + /* + * Initialize variables of Algorithm D (step D1). + */ + for (j = 0; j < maxTapes; j++) + { + state->tp_fib[j] = 1; + state->tp_runs[j] = 0; + state->tp_dummy[j] = 1; + state->tp_tapenum[j] = j; + } + state->tp_fib[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 0; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * selectnewtape -- select new tape for new initial run. + * + * This is called after finishing a run when we know another run + * must be started. This implements steps D3, D4 of Algorithm D. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + int j; + int a; + + /* Step D3: advance j (destTape) */ + if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) + { + state->destTape++; + return; + } + if (state->tp_dummy[state->destTape] != 0) + { + state->destTape = 0; + return; + } + + /* Step D4: increase level */ + state->Level++; + a = state->tp_fib[0]; + for (j = 0; j < state->tapeRange; j++) + { + state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; + state->tp_fib[j] = a + state->tp_fib[j + 1]; + } + state->destTape = 0; +} + +/* + * Initialize the slab allocation arena, for the given number of slots. + */ +static void +init_slab_allocator(Tuplesortstate *state, int numSlots) +{ + if (numSlots > 0) + { + char *p; + int i; + + state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE); + state->slabMemoryEnd = state->slabMemoryBegin + + numSlots * SLAB_SLOT_SIZE; + state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin; + USEMEM(state, numSlots * SLAB_SLOT_SIZE); + + p = state->slabMemoryBegin; + for (i = 0; i < numSlots - 1; i++) + { + ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE); + p += SLAB_SLOT_SIZE; + } + ((SlabSlot *) p)->nextfree = NULL; + } + else + { + state->slabMemoryBegin = state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; + } + state->slabAllocatorUsed = true; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements steps D5, D6 of Algorithm D. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum, + svTape, + svRuns, + svDummy; + int numTapes; + int numInputTapes; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * Reset tuple memory. We've freed all the tuples that we previously + * allocated. We will use the slab allocator from now on. + */ + MemoryContextDelete(state->tuplecontext); + state->tuplecontext = NULL; + + /* + * We no longer need a large memtuples array. (We will allocate a smaller + * one for the heap later.) + */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + pfree(state->memtuples); + state->memtuples = NULL; + + /* + * If we had fewer runs than tapes, refund the memory that we imagined we + * would need for the tape buffers of the unused tapes. + * + * numTapes and numInputTapes reflect the actual number of tapes we will + * use. Note that the output tape's tape number is maxTapes - 1, so the + * tape numbers of the used tapes are not consecutive, and you cannot just + * loop from 0 to numTapes to visit all used tapes! + */ + if (state->Level == 1) + { + numInputTapes = state->currentRun; + numTapes = numInputTapes + 1; + FREEMEM(state, (state->maxTapes - numTapes) * TAPE_BUFFER_OVERHEAD); + } + else + { + numInputTapes = state->tapeRange; + numTapes = state->maxTapes; + } + + /* + * Initialize the slab allocator. We need one slab slot per input tape, + * for the tuples in the heap, plus one to hold the tuple last returned + * from tuplesort_gettuple. (If we're sorting pass-by-val Datums, + * however, we don't need to do allocate anything.) + * + * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism + * to track memory usage of individual tuples. + */ + if (state->tuples) + init_slab_allocator(state, numInputTapes + 1); + else + init_slab_allocator(state, 0); + + /* + * If we produced only one initial run (quite likely if the total data + * volume is between 1X and 2X workMem when replacement selection is used, + * but something we particularly count on when input is presorted), we can + * just use that tape as the finished output, rather than doing a useless + * merge. (This obvious optimization is not in Knuth's algorithm.) + */ + if (state->currentRun == RUN_SECOND) + { + state->result_tape = state->tp_tapenum[state->destTape]; + /* must freeze and rewind the finished output tape */ + LogicalTapeFreeze(state->tapeset, state->result_tape); + state->status = TSS_SORTEDONTAPE; + return; + } + + /* + * Allocate a new 'memtuples' array, for the heap. It will hold one tuple + * from each input tape. + */ + state->memtupsize = numInputTapes; + state->memtuples = (SortTuple *) palloc(numInputTapes * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* + * Use all the remaining memory we have available for read buffers among + * the input tapes. + * + * We do this only after checking for the case that we produced only one + * initial run, because there is no need to use a large read buffer when + * we're reading from a single tape. With one tape, the I/O pattern will + * be the same regardless of the buffer size. + * + * We don't try to "rebalance" the memory among tapes, when we start a new + * merge phase, even if some tapes are inactive in the new phase. That + * would be hard, because logtape.c doesn't know where one run ends and + * another begins. When a new merge phase begins, and a tape doesn't + * participate in it, its buffer nevertheless already contains tuples from + * the next run on same tape, so we cannot release the buffer. That's OK + * in practice, merge performance isn't that sensitive to the amount of + * buffers used, and most merge phases use all or almost all tapes, + * anyway. + */ +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "using " INT64_FORMAT " KB of memory for read buffers among %d input tapes", + (state->availMem) / 1024, numInputTapes); +#endif + + state->read_buffer_size = Max(state->availMem / numInputTapes, 0); + USEMEM(state, state->read_buffer_size * numInputTapes); + + /* End of step D2: rewind all output tapes to prepare for merging */ + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + LogicalTapeRewindForRead(state->tapeset, tapenum, state->read_buffer_size); + + for (;;) + { + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess) + { + bool allOneRun = true; + + Assert(state->tp_runs[state->tapeRange] == 0); + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { + allOneRun = false; + break; + } + } + if (allOneRun) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + + if (allDummy) + { + state->tp_dummy[state->tapeRange]++; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + state->tp_dummy[tapenum]--; + } + else + mergeonerun(state); + } + + /* Step D6: decrease level */ + if (--state->Level == 0) + break; + /* rewind output tape T to use as new input */ + LogicalTapeRewindForRead(state->tapeset, state->tp_tapenum[state->tapeRange], + state->read_buffer_size); + /* rewind used-up input tape P, and prepare it for write pass */ + LogicalTapeRewindForWrite(state->tapeset, state->tp_tapenum[state->tapeRange - 1]); + state->tp_runs[state->tapeRange - 1] = 0; + + /* + * reassign tape units per step D6; note we no longer care about A[] + */ + svTape = state->tp_tapenum[state->tapeRange]; + svDummy = state->tp_dummy[state->tapeRange]; + svRuns = state->tp_runs[state->tapeRange]; + for (tapenum = state->tapeRange; tapenum > 0; tapenum--) + { + state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; + state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; + state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; + } + state->tp_tapenum[0] = svTape; + state->tp_dummy[0] = svDummy; + state->tp_runs[0] = svRuns; + } + + /* + * Done. Knuth says that the result is on TAPE[1], but since we exited + * the loop without performing the last iteration of step D6, we have not + * rearranged the tape unit assignment, and therefore the result is on + * TAPE[T]. We need to do it this way so that we can freeze the final + * output tape while rewinding it. The last iteration of step D6 would be + * a waste of cycles anyway... + */ + state->result_tape = state->tp_tapenum[state->tapeRange]; + LogicalTapeFreeze(state->tapeset, state->result_tape); + state->status = TSS_SORTEDONTAPE; + + /* Release the read buffers of all the other tapes, by rewinding them. */ + for (tapenum = 0; tapenum < state->maxTapes; tapenum++) + { + if (tapenum != state->result_tape) + LogicalTapeRewindForWrite(state->tapeset, tapenum); + } +} + +/* + * Merge one run from each input tape, except ones with dummy runs. + * + * This is the inner loop of Algorithm D step D5. We know that the + * output tape is TAPE[T]. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int destTape = state->tp_tapenum[state->tapeRange]; + int srcTape; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. We can also decrease the input run/dummy run counts. + */ + beginmerge(state); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + SortTuple stup; + + /* write the tuple to destTape */ + srcTape = state->memtuples[0].tupindex; + WRITETUP(state, destTape, &state->memtuples[0]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[0].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple); + + /* + * pull next tuple from the tape, and replace the written-out tuple in + * the heap with it. + */ + if (mergereadnext(state, srcTape, &stup)) + { + stup.tupindex = srcTape; + tuplesort_heap_replace_top(state, &stup, false); + + } + else + tuplesort_heap_delete_top(state, false); + } + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape, and increment its count of real runs. + */ + markrunend(state, destTape); + state->tp_runs[state->tapeRange]++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished %d-way merge step: %s", state->activeTapes, + pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * beginmerge - initialize for a merge pass + * + * We decrease the counts of real and dummy runs for each tape, and mark + * which tapes contain active input runs in mergeactive[]. Then, fill the + * merge heap with the first tuple from each active tape. + */ +static void +beginmerge(Tuplesortstate *state) +{ + int activeTapes; + int tapenum; + int srcTape; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + /* Adjust run counts and mark the active tapes */ + memset(state->mergeactive, 0, + state->maxTapes * sizeof(*state->mergeactive)); + activeTapes = 0; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] > 0) + state->tp_dummy[tapenum]--; + else + { + Assert(state->tp_runs[tapenum] > 0); + state->tp_runs[tapenum]--; + srcTape = state->tp_tapenum[tapenum]; + state->mergeactive[srcTape] = true; + activeTapes++; + } + } + Assert(activeTapes > 0); + state->activeTapes = activeTapes; + + /* Load the merge heap with the first tuple from each input tape */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + SortTuple tup; + + if (mergereadnext(state, srcTape, &tup)) + { + tup.tupindex = srcTape; + tuplesort_heap_insert(state, &tup, false); + } + } +} + +/* + * mergereadnext - read next tuple from one merge input tape + * + * Returns false on EOF. + */ +static bool +mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup) +{ + unsigned int tuplen; + + if (!state->mergeactive[srcTape]) + return false; /* tape's run is already exhausted */ + + /* read next tuple, if any */ + if ((tuplen = getlen(state, srcTape, true)) == 0) + { + state->mergeactive[srcTape] = false; + return false; + } + READTUP(state, stup, srcTape, tuplen); + + return true; +} + +/* + * dumptuples - remove tuples from memtuples and write to tape + * + * This is used during initial-run building, but not during merging. + * + * When alltuples = false and replacement selection is still active, dump + * only enough tuples to get under the availMem limit (and leave at least + * one tuple in memtuples, since puttuple will then assume it is a heap that + * has a tuple to compare to). We always insist there be at least one free + * slot in the memtuples[] array. + * + * When alltuples = true, dump everything currently in memory. (This + * case is only used at end of input data, although in practice only the + * first run could fail to dump all tuples when we LACKMEM(), and only + * when replacement selection is active.) + * + * If, when replacement selection is active, we see that the tuple run + * number at the top of the heap has changed, start a new run. This must be + * the first run, because replacement selection is always abandoned for all + * further runs. + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + while (alltuples || + (LACKMEM(state) && state->memtupcount > 1) || + state->memtupcount >= state->memtupsize) + { + if (state->replaceActive) + { + /* + * Still holding out for a case favorable to replacement + * selection. Still incrementally spilling using heap. + * + * Dump the heap's frontmost entry, and remove it from the heap. + */ + Assert(state->memtupcount > 0); + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[0]); + tuplesort_heap_delete_top(state, true); + } + else + { + /* + * Once committed to quicksorting runs, never incrementally spill + */ + dumpbatch(state, alltuples); + break; + } + + /* + * If top run number has changed, we've finished the current run (this + * can only be the first run), and will no longer spill incrementally. + */ + if (state->memtupcount == 0 || + state->memtuples[0].tupindex == HEAP_RUN_NEXT) + { + markrunend(state, state->tp_tapenum[state->destTape]); + Assert(state->currentRun == RUN_FIRST); + state->currentRun++; + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished incrementally writing %s run %d to tape %d: %s", + (state->memtupcount == 0) ? "only" : "first", + state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Done if heap is empty, which is possible when there is only one + * long run. + */ + Assert(state->currentRun == RUN_SECOND); + if (state->memtupcount == 0) + { + /* + * Replacement selection best case; no final merge required, + * because there was only one initial run (second run has no + * tuples). See RUN_SECOND case in mergeruns(). + */ + break; + } + + /* + * Abandon replacement selection for second run (as well as any + * subsequent runs). + */ + state->replaceActive = false; + + /* + * First tuple of next run should not be heapified, and so will + * bear placeholder run number. In practice this must actually be + * the second run, which just became the currentRun, so we're + * clear to quicksort and dump the tuples in batch next time + * memtuples becomes full. + */ + Assert(state->memtuples[0].tupindex == HEAP_RUN_NEXT); + selectnewtape(state); + } + } +} + +/* + * dumpbatch - sort and dump all memtuples, forming one run on tape + * + * Second or subsequent runs are never heapified by this module (although + * heapification still respects run number differences between the first and + * second runs), and a heap (replacement selection priority queue) is often + * avoided in the first place. + */ +static void +dumpbatch(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. + * + * In general, short final runs are quite possible. Rather than allowing + * a special case where there was a superfluous selectnewtape() call (i.e. + * a call with no subsequent run actually written to destTape), we prefer + * to write out a 0 tuple run. + * + * mergereadnext() is prepared for 0 tuple runs, and will reliably mark + * the tape inactive for the merge when called from beginmerge(). This + * case is therefore similar to the case where mergeonerun() finds a dummy + * run for the tape, and so doesn't need to merge a run from the tape (or + * conceptually "merges" the dummy run, if you prefer). According to + * Knuth, Algorithm D "isn't strictly optimal" in its method of + * distribution and dummy run assignment; this edge case seems very + * unlikely to make that appreciably worse. + */ + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "starting quicksort of run %d: %s", + state->currentRun, pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished quicksort of run %d: %s", + state->currentRun, pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in the sizes of incoming tuples. Fragmentation due to + * AllocSetFree's bucketing by size class might be particularly bad if + * this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state, state->tp_tapenum[state->destTape]); + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished writing run %d to tape %d: %s", + state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + if (!alltuples) + selectnewtape(state); +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewindForRead(state->tapeset, + state->result_tape, + 0); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->tapeset, + state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + LogicalTapeSeek(state->tapeset, + state->result_tape, + state->markpos_block, + state->markpos_offset); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + * spaceUsed is measured in kilobytes. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + const char **sortMethod, + const char **spaceType, + long *spaceUsed) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + *spaceType = "Disk"; + *spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024); + } + else + { + *spaceType = "Memory"; + *spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; + } + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + *sortMethod = "top-N heapsort"; + else + *sortMethod = "quicksort"; + break; + case TSS_SORTEDONTAPE: + *sortMethod = "external sort"; + break; + case TSS_FINALMERGE: + *sortMethod = "external merge"; + break; + default: + *sortMethod = "still in progress"; + break; + } +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + * + * Compare two SortTuples. If checkIndex is true, use the tuple index + * as the front of the sort key; otherwise, no. + * + * Note that for checkIndex callers, the heap invariant is never + * maintained beyond the first run, and so there are no COMPARETUP() + * calls needed to distinguish tuples in HEAP_RUN_NEXT. + */ + +#define HEAPCOMPARE(tup1,tup2) \ + (checkIndex && ((tup1)->tupindex != (tup2)->tupindex || \ + (tup1)->tupindex == HEAP_RUN_NEXT) ? \ + ((tup1)->tupindex) - ((tup2)->tupindex) : \ + COMPARETUP(state, tup1, tup2)) + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + * + * We assume that all entries in a bounded heap will always have tupindex + * zero; it therefore doesn't matter that HEAPCOMPARE() doesn't reverse + * the direction of comparison for tupindexes. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount < state->bound) + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + stup.tupindex = 0; /* not used */ + tuplesort_heap_insert(state, &stup, false); + } + else + { + /* + * The heap is full. Replace the largest entry with the new + * tuple, or just discard it, if it's larger than anything already + * in the heap. + */ + if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + tuplesort_heap_replace_top(state, &state->memtuples[i], false); + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + + /* + * We can unheapify in place because each delete-top call will remove the + * largest entry, which we can promptly store in the newly freed slot at + * the end. Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_delete_top(state, false); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts. Quicksort is also generally + * preferred to replacement selection for generating runs during external sort + * operations, although replacement selection is sometimes used for the first + * run. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: For some callers, tuple points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple, + bool checkIndex) +{ + SortTuple *memtuples; + int j; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + Assert(!checkIndex || tuple->tupindex == RUN_FIRST); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (HEAPCOMPARE(tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * Remove the tuple at state->memtuples[0] from the heap. Decrement + * memtupcount, and sift up to maintain the heap invariant. + * + * The caller has already free'd the tuple the top node points to, + * if necessary. + */ +static void +tuplesort_heap_delete_top(Tuplesortstate *state, bool checkIndex) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + + Assert(!checkIndex || state->currentRun == RUN_FIRST); + if (--state->memtupcount <= 0) + return; + + /* + * Remove the last tuple in the heap, and re-insert it, by replacing the + * current top node with it. + */ + tuple = &memtuples[state->memtupcount]; + tuplesort_heap_replace_top(state, tuple, checkIndex); +} + +/* + * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to + * maintain the heap invariant. + * + * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H, + * Heapsort, steps H3-H8). + */ +static void +tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple, + bool checkIndex) +{ + SortTuple *memtuples = state->memtuples; + unsigned int i, + n; + + Assert(!checkIndex || state->currentRun == RUN_FIRST); + Assert(state->memtupcount >= 1); + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + HEAPCOMPARE(&memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (HEAPCOMPARE(tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(Tuplesortstate *state, int tapenum, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(state->tapeset, tapenum, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(Tuplesortstate *state, int tapenum) +{ + unsigned int len = 0; + + LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. + * + * We use next free slot from the slab allocator, or palloc() if the tuple + * is too large for that. + */ +static void * +readtup_alloc(Tuplesortstate *state, Size tuplen) +{ + SlabSlot *buf; + + /* + * We pre-allocate enough slots in the slab arena that we should never run + * out. + */ + Assert(state->slabFreeHead); + + if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead) + return MemoryContextAlloc(state->sortcontext, tuplen); + else + { + buf = state->slabFreeHead; + /* Reuse this slot */ + state->slabFreeHead = buf->nextfree; + + return buf; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); + } +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(state->tapeset, tapenum, + tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_KeyAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreTuple(ltup, ecxt_scantuple, InvalidBuffer, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreTuple(rtup, ecxt_scantuple, InvalidBuffer, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_KeyAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_KeyAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_KeyAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); + } +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_KeyAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + Bucket bucket1; + Bucket bucket2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + Assert(!b->isnull1); + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + if (bucket1 > bucket2) + return 1; + else if (bucket1 < bucket2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + IndexTuple tuple = (IndexTuple) tup; + unsigned int tuplen = IndexTupleSize(tuple); + IndexTuple newtuple; + Datum original; + + /* copy the tuple into sort storage */ + newtuple = (IndexTuple) MemoryContextAlloc(state->tuplecontext, tuplen); + memcpy(newtuple, tuple, tuplen); + USEMEM(state, GetMemoryChunkSpace(newtuple)); + stup->tuple = (void *) newtuple; + /* set up first-column key value */ + original = index_getattr(newtuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (IndexTuple) mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } +} + +static void +writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); + } +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(state->tapeset, tapenum, + waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + + if (!state->slabAllocatorUsed && stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(state->tapeset, tapenum, + &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} diff --git a/src/tuplesort11.c b/src/tuplesort11.c new file mode 100644 index 0000000000..60223929e5 --- /dev/null +++ b/src/tuplesort11.c @@ -0,0 +1,4593 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about the external + * sorting algorithm. Historically, we divided the input into sorted runs + * using replacement selection, in the form of a priority tree implemented + * as a heap (essentially his Algorithm 5.2.3H), but now we always use + * quicksort for run generation. We merge the runs using polyphase merge, + * Knuth's Algorithm 5.4.2D. The logical "tapes" used by Algorithm D are + * implemented by logtape.c, which avoids space wastage by recycling disk + * space as soon as each block is read from its "tape". + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape (selected per Algorithm D). After the end of the + * input is reached, we dump out remaining tuples in memory into a final run, + * then merge the runs using Algorithm D. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and replace it + * with the next tuple from its source tape (if any). When the heap empties, + * the merge is complete. The basic merge algorithm thus needs very little + * memory --- only M tuples for an M-way merge, and M is constrained to a + * small number. However, we can still make good use of our full workMem + * allocation by pre-reading additional blocks from each source tape. Without + * prereading, our access pattern to the temporary file would be very erratic; + * on average we'd read one block from each of M source tapes during the same + * time that we're writing M blocks to the output tape, so there is no + * sequentiality of access at all, defeating the read-ahead methods used by + * most Unix kernels. Worse, the output tape gets written into a very random + * sequence of blocks of the temp file, ensuring that things will be even + * worse when it comes time to read that tape. A straightforward merge pass + * thus ends up doing a lot of waiting for disk seeks. We can improve matters + * by prereading from each source tape sequentially, loading about workMem/M + * bytes from each tape in turn, and making the sequential blocks immediately + * available for reuse. This approach helps to localize both read and write + * accesses. The pre-reading is handled by logtape.c, we just tell it how + * much memory to use for the buffers. + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the + * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according + * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that + * tape drives are expensive beasts, and in particular that there will always + * be many more runs than tape drives. In our implementation a "tape drive" + * doesn't cost much more than a few Kb of memory buffers, so we can afford + * to have lots of them. In particular, if we can have as many tape drives + * as sorted runs, we can eliminate any repeated I/O at all. In the current + * code we determine the number of tapes M on the basis of workMem: we want + * workMem/M to be large enough that we read a fair amount of data each time + * we preread from a tape, so as to maintain the locality of access described + * above. Nonetheless, with large workMem we can have many tapes (but not + * too many -- see the comments in tuplesort_merge_order). + * + * This module supports parallel sorting. Parallel sorts involve coordination + * among one or more worker processes, and a leader process, each with its own + * tuplesort state. The leader process (or, more accurately, the + * Tuplesortstate associated with a leader process) creates a full tapeset + * consisting of worker tapes with one run to merge; a run for every + * worker process. This is then merged. Worker processes are guaranteed to + * produce exactly one output run from their partial input. + * + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/hash.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* Sort parallel code from state for sort__start probes */ +#define PARALLEL_SORT(state) ((state)->shared == NULL ? 0 : \ + (state)->worker >= 0 ? 1 : 2) + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during merge, when we use a + * simple slab allocator). SortTuples also contain the tuple's first key + * column in Datum/nullflag format, and an index integer. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + * + * tupindex holds the input tape number that each tuple in the heap was read + * from during merge passes. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int tupindex; /* see notes above */ +} SortTuple; + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much data we'd like to read from each input + * tape during a preread cycle (see discussion at top of file). + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* number of tapes (Knuth's T) */ + int tapeRange; /* maxTapes-1 (Knuth's P) */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. Unless the slab allocator is + * used, after writing the tuple, pfree() the out-of-line data (not the + * SortTuple struct!), and increase state->availMem by the amount of + * memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, int tapenum, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. The tuple is allocated + * from the slab memory arena, or is palloc'd, see readtup_alloc(). + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Buffer size to use for reading input tapes, during merge. */ + size_t read_buffer_size; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Unless otherwise noted, all pointer variables below are pointers to + * arrays of length maxTapes, holding per-tape data. + */ + + /* + * This variable is only used during merge passes. mergeactive[i] is true + * if we are reading an input run from (actual) tape number i and have not + * yet exhausted that run. + */ + bool *mergeactive; /* active input run source? */ + + /* + * Variables for Algorithm D. Note that destTape is a "logical" tape + * number, ie, an index into the tp_xxx[] arrays. Be careful to keep + * "logical" and "actual" tape numbers straight! + */ + int Level; /* Knuth's l */ + int destTape; /* current output tape (Knuth's j, less 1) */ + int *tp_fib; /* Target Fibonacci run counts (A[]) */ + int *tp_runs; /* # of real runs on each tape */ + int *tp_dummy; /* # of dummy runs for each tape (D[]) */ + int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ + int activeTapes; /* # of active input tapes in merge pass */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + int result_tape; /* actual tape number of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run leader can merge. Typically includes a worker state held + * by the leader process itself. Set in the leader Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +/* + * Private mutable state of tuplesort-parallel-operation. This is allocated + * in shared memory. + */ +struct Sharedsort +{ + /* mutex protects all fields prior to tapes */ + slock_t mutex; + + /* + * currentWorker generates ordinal identifier numbers for parallel sort + * workers. These start from 0, and are always gapless. + * + * Workers increment workersFinished to indicate having finished. If this + * is equal to state.nParticipants within the leader, leader is ready to + * merge worker runs. + */ + int currentWorker; + int workersFinished; + + /* Temporary file space */ + SharedFileSet fileset; + + /* Size of tapes flexible array */ + int nTapes; + + /* + * Tapes array used by workers to report back information needed by the + * leader to concatenate all worker tapes into one for merging + */ + TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) +#define SERIAL(state) ((state)->shared == NULL) +#define WORKER(state) ((state)->shared && (state)->worker != -1) +#define LEADER(state) ((state)->shared && (state)->worker == -1) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on readtup and copytup routines + * to use the right memory context for these tuples (and to not use the + * reset context for anything whose lifetime needs to span multiple + * external sort runs). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ + do { \ + if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, + SortCoordinate coordinate, + bool randomAccess); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state, bool mergeruns); +static void inittapestate(Tuplesortstate *state, int maxTapes); +static void selectnewtape(Tuplesortstate *state); +static void init_slab_allocator(Tuplesortstate *state, int numSlots); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state); +static bool mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_delete_top(Tuplesortstate *state); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK); +static void markrunend(Tuplesortstate *state, int tapenum); +static void *readtup_alloc(Tuplesortstate *state, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int worker_get_identifier(Tuplesortstate *state); +static void worker_freeze_result_tape(Tuplesortstate *state); +static void worker_nomergeruns(Tuplesortstate *state); +static void leader_takeover_tapes(Tuplesortstate *state); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ +#include "qsort_tuple.c" + + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext sortcontext; + MemoryContext tuplecontext; + MemoryContext oldcontext; + + /* See leader_takeover_tapes() remarks on randomAccess support */ + if (coordinate && randomAccess) + elog(ERROR, "random access disallowed under parallel sort"); + + /* + * Create a working memory context for this sort operation. All data + * needed by the sort will live inside this context. + */ + sortcontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + tuplecontext = AllocSetContextCreate(sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + /* + * Make the Tuplesortstate within the per-sort context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(sortcontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->status = TSS_INITIAL; + state->randomAccess = randomAccess; + state->bounded = false; + state->tuples = true; + state->boundUsed = false; + + /* + * workMem is forced to be at least 64KB, the current minimum valid value + * for the work_mem GUC. This is a defense against parallel sort callers + * that divide out memory among many workers in a way that leaves each + * with very little memory. + */ + state->allowedMem = Max(workMem, 64) * (int64) 1024; + state->availMem = state->allowedMem; + state->sortcontext = sortcontext; + state->tuplecontext = tuplecontext; + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = Max(1024, + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); + + state->growmemtuples = true; + state->slabAllocatorUsed = false; + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = 0; + + /* + * maxTapes, tapeRange, and Algorithm D variables will be initialized by + * inittapes(), if needed + */ + + state->result_tape = -1; /* flag that result tape has not been formed */ + + /* + * Initialize parallel-related state based on coordination information + * from caller + */ + if (!coordinate) + { + /* Serial sort */ + state->shared = NULL; + state->worker = -1; + state->nParticipants = -1; + } + else if (coordinate->isWorker) + { + /* Parallel worker produces exactly one final run from all input */ + state->shared = coordinate->sharedsort; + state->worker = worker_get_identifier(state); + state->nParticipants = -1; + } + else + { + /* Parallel leader state only used for final merge */ + state->shared = coordinate->sharedsort; + state->worker = -1; + state->nParticipants = coordinate->nParticipants; + Assert(state->nParticipants >= 1); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + ScanKey indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey_nodata(indexRel); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + _bt_freeskey(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + ScanKey indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey_nodata(indexRel); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + _bt_freeskey(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple is needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each sortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. Parallel leader tuplesorts will always ignore the hint. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL); + Assert(state->memtupcount == 0); + Assert(!state->bounded); + Assert(!WORKER(state)); + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* Parallel leader ignores hint */ + if (LEADER(state)) + return; + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", + SERIAL(state) ? "external sort" : "parallel external sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "%s of worker %d ended, %ld KB used: %s", + SERIAL(state) ? "internal sort" : "unperformed parallel sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory, + * including the Tuplesortstate struct itself. + */ + MemoryContextDelete(state->sortcontext); +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return true if we were able to enlarge the array, false if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + Assert(!LEADER(state)); + + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state, true); + + /* + * Dump all tuples. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, replacing it with the new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_replace_top(state, tuple); + } + break; + + case TSS_BUILDRUNS: + + /* + * Save the tuple into the unsorted array (there must be space) + */ + state->memtuples[state->memtupcount++] = *tuple; + + /* + * If we are over the memory limit, dump all tuples. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort of worker %d starting: %s", + state->worker, pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory, or leader to take over worker tapes + */ + if (SERIAL(state)) + { + /* Just qsort 'em and we're done */ + tuplesort_sort_memtuples(state); + state->status = TSS_SORTEDINMEM; + } + else if (WORKER(state)) + { + /* + * Parallel workers must still dump out tuples to tape. No + * merge is required to produce single output run, though. + */ + inittapes(state, false); + dumptuples(state, true); + worker_nomergeruns(state); + state->status = TSS_SORTEDONTAPE; + } + else + { + /* + * Leader will take over worker tapes and merge worker runs. + * Note that mergeruns sets the correct state->status. + */ + leader_takeover_tapes(state); + mergeruns(state); + } + state->current = 0; + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess and !WORKER(), one run per tape). + * Note that mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort of worker %d done (except %d-way final merge): %s", + state->worker, state->activeTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort of worker %d done: %s", + state->worker, pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns false if no more tuples. + * Returned tuple belongs to tuplesort memory context, and must not be freed + * by caller. Note that fetched tuple is stored in memory that may be + * recycled by any future fetch. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup) +{ + unsigned int tuplen; + size_t nmoved; + + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->slabAllocatorUsed); + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(state->slabAllocatorUsed); + + /* + * The slot that held the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + if (forward) + { + if (state->eof_reached) + return false; + + if ((tuplen = getlen(state, state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle + * its memory on next call. (This can be NULL, in the + * !state->tuples case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + 2 * sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != 2 * sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + tuplen = getlen(state, state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + 2 * sizeof(unsigned int)); + if (nmoved == tuplen + sizeof(unsigned int)) + { + /* + * We backed up over the previous tuple, but there was no + * ending length word before it. That means that the prev + * tuple is the first tuple in the file. It is now the + * next to read in forward direction (not obviously right, + * but that is what in-memory case does). + */ + return false; + } + else if (nmoved != tuplen + 2 * sizeof(unsigned int)) + elog(ERROR, "bogus tuple length in backward scan"); + } + + tuplen = getlen(state, state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen); + if (nmoved != tuplen) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle its memory + * on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + + case TSS_FINALMERGE: + Assert(forward); + /* We are managing memory ourselves, with the slab allocator. */ + Assert(state->slabAllocatorUsed); + + /* + * The slab slot holding the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTape = state->memtuples[0].tupindex; + SortTuple newtup; + + *stup = state->memtuples[0]; + + /* + * Remember the tuple we return, so that we can recycle its + * memory on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + /* + * Pull next tuple from tape, and replace the returned tuple + * at top of the heap with it. + */ + if (!mergereadnext(state, srcTape, &newtup)) + { + /* + * If no more data, we've reached end of run on this tape. + * Remove the top node from the heap. + */ + tuplesort_heap_delete_top(state); + + /* + * Rewind to free the read buffer. It'd go away at the + * end of the sort anyway, but better to release the + * memory early. + */ + LogicalTapeRewindForWrite(state->tapeset, srcTape); + return true; + } + newtup.tupindex = srcTape; + tuplesort_heap_replace_top(state, &newtup); + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return true; else, clear the slot + * and return false. + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * If copy is true, the slot receives a tuple that's been copied into the + * caller's memory context, so that it will stay valid regardless of future + * manipulations of the tuplesort's state (up to and including deleting the + * tuplesort). If copy is false, the slot will just receive a pointer to a + * tuple held within the tuplesort, which is more efficient, but only safe for + * callers that are prepared to have any subsequent manipulation of the + * tuplesort's state invalidate slot contents. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (copy) + stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); + + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns false if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* use stup.tuple because stup.datum1 may be an abbreviation */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns true if successful, false if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /* + * We need one tape for each merge input, plus another one for the output, + * and each of these tapes needs buffer space. In addition we want + * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't + * count). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + */ + mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / + (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); + + /* + * Even in minimum memory, use at least a MINORDER merge. On the other + * hand, even when we have lots of memory, do not use more than a MAXORDER + * merge. Tapes are pretty cheap, but they're not entirely free. Each + * additional tape reduces the amount of memory available to build runs, + * which in turn can cause the same sort to need more runs, which makes + * merging slower even if it can still be done in a single pass. Also, + * high order merges are quite slow due to CPU cache effects; it can be + * faster to pay the I/O cost of a polyphase merge than to perform a + * single merge pass across many hundreds of tapes. + */ + mOrder = Max(mOrder, MINORDER); + mOrder = Min(mOrder, MAXORDER); + + return mOrder; +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we won't sort in memory. + */ +static void +inittapes(Tuplesortstate *state, bool mergeruns) +{ + int maxTapes, + j; + + Assert(!LEADER(state)); + + if (mergeruns) + { + /* Compute number of tapes to use: merge order plus 1 */ + maxTapes = tuplesort_merge_order(state->allowedMem) + 1; + } + else + { + /* Workers can sometimes produce single run, output without merge */ + Assert(WORKER(state)); + maxTapes = MINORDER + 1; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d switching to external sort with %d tapes: %s", + state->worker, maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* Create the tape set and allocate the per-tape data arrays */ + inittapestate(state, maxTapes); + state->tapeset = + LogicalTapeSetCreate(maxTapes, NULL, + state->shared ? &state->shared->fileset : NULL, + state->worker); + + state->currentRun = 0; + + /* + * Initialize variables of Algorithm D (step D1). + */ + for (j = 0; j < maxTapes; j++) + { + state->tp_fib[j] = 1; + state->tp_runs[j] = 0; + state->tp_dummy[j] = 1; + state->tp_tapenum[j] = j; + } + state->tp_fib[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 0; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * inittapestate - initialize generic tape management state + */ +static void +inittapestate(Tuplesortstate *state, int maxTapes) +{ + int64 tapeSpace; + + /* + * Decrease availMem to reflect the space needed for tape buffers; but + * don't decrease it to the point that we have no room for tuples. (That + * case is only likely to occur if sorting pass-by-value Datums; in all + * other scenarios the memtuples[] array is unlikely to occupy more than + * half of allowedMem. In the pass-by-value case it's not important to + * account for tuple space, so we don't care if LACKMEM becomes + * inaccurate.) + */ + tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. For parallel sorts, this should have been + * called already, but it doesn't matter if it is called a second time. + */ + PrepareTempTablespaces(); + + state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); + state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); + + /* Record # of tapes allocated (for duration of sort) */ + state->maxTapes = maxTapes; + /* Record maximum # of tapes usable as inputs when merging */ + state->tapeRange = maxTapes - 1; +} + +/* + * selectnewtape -- select new tape for new initial run. + * + * This is called after finishing a run when we know another run + * must be started. This implements steps D3, D4 of Algorithm D. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + int j; + int a; + + /* Step D3: advance j (destTape) */ + if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) + { + state->destTape++; + return; + } + if (state->tp_dummy[state->destTape] != 0) + { + state->destTape = 0; + return; + } + + /* Step D4: increase level */ + state->Level++; + a = state->tp_fib[0]; + for (j = 0; j < state->tapeRange; j++) + { + state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; + state->tp_fib[j] = a + state->tp_fib[j + 1]; + } + state->destTape = 0; +} + +/* + * Initialize the slab allocation arena, for the given number of slots. + */ +static void +init_slab_allocator(Tuplesortstate *state, int numSlots) +{ + if (numSlots > 0) + { + char *p; + int i; + + state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE); + state->slabMemoryEnd = state->slabMemoryBegin + + numSlots * SLAB_SLOT_SIZE; + state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin; + USEMEM(state, numSlots * SLAB_SLOT_SIZE); + + p = state->slabMemoryBegin; + for (i = 0; i < numSlots - 1; i++) + { + ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE); + p += SLAB_SLOT_SIZE; + } + ((SlabSlot *) p)->nextfree = NULL; + } + else + { + state->slabMemoryBegin = state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; + } + state->slabAllocatorUsed = true; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements steps D5, D6 of Algorithm D. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum, + svTape, + svRuns, + svDummy; + int numTapes; + int numInputTapes; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * Reset tuple memory. We've freed all the tuples that we previously + * allocated. We will use the slab allocator from now on. + */ + MemoryContextDelete(state->tuplecontext); + state->tuplecontext = NULL; + + /* + * We no longer need a large memtuples array. (We will allocate a smaller + * one for the heap later.) + */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + pfree(state->memtuples); + state->memtuples = NULL; + + /* + * If we had fewer runs than tapes, refund the memory that we imagined we + * would need for the tape buffers of the unused tapes. + * + * numTapes and numInputTapes reflect the actual number of tapes we will + * use. Note that the output tape's tape number is maxTapes - 1, so the + * tape numbers of the used tapes are not consecutive, and you cannot just + * loop from 0 to numTapes to visit all used tapes! + */ + if (state->Level == 1) + { + numInputTapes = state->currentRun; + numTapes = numInputTapes + 1; + FREEMEM(state, (state->maxTapes - numTapes) * TAPE_BUFFER_OVERHEAD); + } + else + { + numInputTapes = state->tapeRange; + numTapes = state->maxTapes; + } + + /* + * Initialize the slab allocator. We need one slab slot per input tape, + * for the tuples in the heap, plus one to hold the tuple last returned + * from tuplesort_gettuple. (If we're sorting pass-by-val Datums, + * however, we don't need to do allocate anything.) + * + * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism + * to track memory usage of individual tuples. + */ + if (state->tuples) + init_slab_allocator(state, numInputTapes + 1); + else + init_slab_allocator(state, 0); + + /* + * Allocate a new 'memtuples' array, for the heap. It will hold one tuple + * from each input tape. + */ + state->memtupsize = numInputTapes; + state->memtuples = (SortTuple *) palloc(numInputTapes * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* + * Use all the remaining memory we have available for read buffers among + * the input tapes. + * + * We don't try to "rebalance" the memory among tapes, when we start a new + * merge phase, even if some tapes are inactive in the new phase. That + * would be hard, because logtape.c doesn't know where one run ends and + * another begins. When a new merge phase begins, and a tape doesn't + * participate in it, its buffer nevertheless already contains tuples from + * the next run on same tape, so we cannot release the buffer. That's OK + * in practice, merge performance isn't that sensitive to the amount of + * buffers used, and most merge phases use all or almost all tapes, + * anyway. + */ +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d using " INT64_FORMAT " KB of memory for read buffers among %d input tapes", + state->worker, state->availMem / 1024, numInputTapes); +#endif + + state->read_buffer_size = Max(state->availMem / numInputTapes, 0); + USEMEM(state, state->read_buffer_size * numInputTapes); + + /* End of step D2: rewind all output tapes to prepare for merging */ + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + LogicalTapeRewindForRead(state->tapeset, tapenum, state->read_buffer_size); + + for (;;) + { + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess && !WORKER(state)) + { + bool allOneRun = true; + + Assert(state->tp_runs[state->tapeRange] == 0); + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { + allOneRun = false; + break; + } + } + if (allOneRun) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + + if (allDummy) + { + state->tp_dummy[state->tapeRange]++; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + state->tp_dummy[tapenum]--; + } + else + mergeonerun(state); + } + + /* Step D6: decrease level */ + if (--state->Level == 0) + break; + /* rewind output tape T to use as new input */ + LogicalTapeRewindForRead(state->tapeset, state->tp_tapenum[state->tapeRange], + state->read_buffer_size); + /* rewind used-up input tape P, and prepare it for write pass */ + LogicalTapeRewindForWrite(state->tapeset, state->tp_tapenum[state->tapeRange - 1]); + state->tp_runs[state->tapeRange - 1] = 0; + + /* + * reassign tape units per step D6; note we no longer care about A[] + */ + svTape = state->tp_tapenum[state->tapeRange]; + svDummy = state->tp_dummy[state->tapeRange]; + svRuns = state->tp_runs[state->tapeRange]; + for (tapenum = state->tapeRange; tapenum > 0; tapenum--) + { + state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; + state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; + state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; + } + state->tp_tapenum[0] = svTape; + state->tp_dummy[0] = svDummy; + state->tp_runs[0] = svRuns; + } + + /* + * Done. Knuth says that the result is on TAPE[1], but since we exited + * the loop without performing the last iteration of step D6, we have not + * rearranged the tape unit assignment, and therefore the result is on + * TAPE[T]. We need to do it this way so that we can freeze the final + * output tape while rewinding it. The last iteration of step D6 would be + * a waste of cycles anyway... + */ + state->result_tape = state->tp_tapenum[state->tapeRange]; + if (!WORKER(state)) + LogicalTapeFreeze(state->tapeset, state->result_tape, NULL); + else + worker_freeze_result_tape(state); + state->status = TSS_SORTEDONTAPE; + + /* Release the read buffers of all the other tapes, by rewinding them. */ + for (tapenum = 0; tapenum < state->maxTapes; tapenum++) + { + if (tapenum != state->result_tape) + LogicalTapeRewindForWrite(state->tapeset, tapenum); + } +} + +/* + * Merge one run from each input tape, except ones with dummy runs. + * + * This is the inner loop of Algorithm D step D5. We know that the + * output tape is TAPE[T]. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int destTape = state->tp_tapenum[state->tapeRange]; + int srcTape; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. We can also decrease the input run/dummy run counts. + */ + beginmerge(state); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + SortTuple stup; + + /* write the tuple to destTape */ + srcTape = state->memtuples[0].tupindex; + WRITETUP(state, destTape, &state->memtuples[0]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[0].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple); + + /* + * pull next tuple from the tape, and replace the written-out tuple in + * the heap with it. + */ + if (mergereadnext(state, srcTape, &stup)) + { + stup.tupindex = srcTape; + tuplesort_heap_replace_top(state, &stup); + + } + else + tuplesort_heap_delete_top(state); + } + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape, and increment its count of real runs. + */ + markrunend(state, destTape); + state->tp_runs[state->tapeRange]++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished %d-way merge step: %s", state->worker, + state->activeTapes, pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * beginmerge - initialize for a merge pass + * + * We decrease the counts of real and dummy runs for each tape, and mark + * which tapes contain active input runs in mergeactive[]. Then, fill the + * merge heap with the first tuple from each active tape. + */ +static void +beginmerge(Tuplesortstate *state) +{ + int activeTapes; + int tapenum; + int srcTape; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + /* Adjust run counts and mark the active tapes */ + memset(state->mergeactive, 0, + state->maxTapes * sizeof(*state->mergeactive)); + activeTapes = 0; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] > 0) + state->tp_dummy[tapenum]--; + else + { + Assert(state->tp_runs[tapenum] > 0); + state->tp_runs[tapenum]--; + srcTape = state->tp_tapenum[tapenum]; + state->mergeactive[srcTape] = true; + activeTapes++; + } + } + Assert(activeTapes > 0); + state->activeTapes = activeTapes; + + /* Load the merge heap with the first tuple from each input tape */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + SortTuple tup; + + if (mergereadnext(state, srcTape, &tup)) + { + tup.tupindex = srcTape; + tuplesort_heap_insert(state, &tup); + } + } +} + +/* + * mergereadnext - read next tuple from one merge input tape + * + * Returns false on EOF. + */ +static bool +mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup) +{ + unsigned int tuplen; + + if (!state->mergeactive[srcTape]) + return false; /* tape's run is already exhausted */ + + /* read next tuple, if any */ + if ((tuplen = getlen(state, srcTape, true)) == 0) + { + state->mergeactive[srcTape] = false; + return false; + } + READTUP(state, stup, srcTape, tuplen); + + return true; +} + +/* + * dumptuples - remove tuples from memtuples and write initial run to tape + * + * When alltuples = true, dump everything currently in memory. (This case is + * only used at end of input data.) + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Nothing to do if we still fit in available memory and have array slots, + * unless this is the final call during initial run generation. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state) && + !alltuples) + return; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. + * + * In general, short final runs are quite possible. Rather than allowing + * a special case where there was a superfluous selectnewtape() call (i.e. + * a call with no subsequent run actually written to destTape), we prefer + * to write out a 0 tuple run. + * + * mergereadnext() is prepared for 0 tuple runs, and will reliably mark + * the tape inactive for the merge when called from beginmerge(). This + * case is therefore similar to the case where mergeonerun() finds a dummy + * run for the tape, and so doesn't need to merge a run from the tape (or + * conceptually "merges" the dummy run, if you prefer). According to + * Knuth, Algorithm D "isn't strictly optimal" in its method of + * distribution and dummy run assignment; this edge case seems very + * unlikely to make that appreciably worse. + */ + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d starting quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in the sizes of incoming tuples. Fragmentation due to + * AllocSetFree's bucketing by size class might be particularly bad if + * this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state, state->tp_tapenum[state->destTape]); + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished writing run %d to tape %d: %s", + state->worker, state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + if (!alltuples) + selectnewtape(state); +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewindForRead(state->tapeset, + state->result_tape, + 0); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->tapeset, + state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + LogicalTapeSeek(state->tapeset, + state->result_tape, + state->markpos_block, + state->markpos_offset); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + TuplesortInstrumentation *stats) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + stats->spaceType = SORT_SPACE_TYPE_DISK; + stats->spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024); + } + else + { + stats->spaceType = SORT_SPACE_TYPE_MEMORY; + stats->spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; + } + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT; + else + stats->sortMethod = SORT_TYPE_QUICKSORT; + break; + case TSS_SORTEDONTAPE: + stats->sortMethod = SORT_TYPE_EXTERNAL_SORT; + break; + case TSS_FINALMERGE: + stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE; + break; + default: + stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS; + break; + } +} + +/* + * Convert TuplesortMethod to a string. + */ +const char * +tuplesort_method_name(TuplesortMethod m) +{ + switch (m) + { + case SORT_TYPE_STILL_IN_PROGRESS: + return "still in progress"; + case SORT_TYPE_TOP_N_HEAPSORT: + return "top-N heapsort"; + case SORT_TYPE_QUICKSORT: + return "quicksort"; + case SORT_TYPE_EXTERNAL_SORT: + return "external sort"; + case SORT_TYPE_EXTERNAL_MERGE: + return "external merge"; + } + + return "unknown"; +} + +/* + * Convert TuplesortSpaceType to a string. + */ +const char * +tuplesort_space_type_name(TuplesortSpaceType t) +{ + Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY); + return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory"; +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + */ + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + Assert(SERIAL(state)); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount < state->bound) + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup); + } + else + { + /* + * The heap is full. Replace the largest entry with the new + * tuple, or just discard it, if it's larger than anything already + * in the heap. + */ + if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + tuplesort_heap_replace_top(state, &state->memtuples[i]); + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + Assert(SERIAL(state)); + + /* + * We can unheapify in place because each delete-top call will remove the + * largest entry, which we can promptly store in the newly freed slot at + * the end. Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_delete_top(state); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts, and external sort runs. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + Assert(!LEADER(state)); + + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: For some callers, tuple points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples; + int j; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (COMPARETUP(state, tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * Remove the tuple at state->memtuples[0] from the heap. Decrement + * memtupcount, and sift up to maintain the heap invariant. + * + * The caller has already free'd the tuple the top node points to, + * if necessary. + */ +static void +tuplesort_heap_delete_top(Tuplesortstate *state) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + + if (--state->memtupcount <= 0) + return; + + /* + * Remove the last tuple in the heap, and re-insert it, by replacing the + * current top node with it. + */ + tuple = &memtuples[state->memtupcount]; + tuplesort_heap_replace_top(state, tuple); +} + +/* + * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to + * maintain the heap invariant. + * + * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H, + * Heapsort, steps H3-H8). + */ +static void +tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples = state->memtuples; + unsigned int i, + n; + + Assert(state->memtupcount >= 1); + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (COMPARETUP(state, tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(Tuplesortstate *state, int tapenum, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(state->tapeset, tapenum, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(Tuplesortstate *state, int tapenum) +{ + unsigned int len = 0; + + LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. + * + * We use next free slot from the slab allocator, or palloc() if the tuple + * is too large for that. + */ +static void * +readtup_alloc(Tuplesortstate *state, Size tuplen) +{ + SlabSlot *buf; + + /* + * We pre-allocate enough slots in the slab arena that we should never run + * out. + */ + Assert(state->slabFreeHead); + + if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead) + return MemoryContextAlloc(state->sortcontext, tuplen); + else + { + buf = state->slabFreeHead; + /* Reuse this slot */ + state->slabFreeHead = buf->nextfree; + + return buf; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); + } +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(state->tapeset, tapenum, + tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreTuple(ltup, ecxt_scantuple, InvalidBuffer, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreTuple(rtup, ecxt_scantuple, InvalidBuffer, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); + } +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + Bucket bucket1; + Bucket bucket2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + Assert(!b->isnull1); + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + if (bucket1 > bucket2) + return 1; + else if (bucket1 < bucket2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + IndexTuple tuple = (IndexTuple) tup; + unsigned int tuplen = IndexTupleSize(tuple); + IndexTuple newtuple; + Datum original; + + /* copy the tuple into sort storage */ + newtuple = (IndexTuple) MemoryContextAlloc(state->tuplecontext, tuplen); + memcpy(newtuple, tuple, tuplen); + USEMEM(state, GetMemoryChunkSpace(newtuple)); + stup->tuple = (void *) newtuple; + /* set up first-column key value */ + original = index_getattr(newtuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (IndexTuple) mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } +} + +static void +writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); + } +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(state->tapeset, tapenum, + waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + + if (!state->slabAllocatorUsed && stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(state->tapeset, tapenum, + &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); +} + +/* + * Parallel sort routines + */ + +/* + * tuplesort_estimate_shared - estimate required shared memory allocation + * + * nWorkers is an estimate of the number of workers (it's the number that + * will be requested). + */ +Size +tuplesort_estimate_shared(int nWorkers) +{ + Size tapesSize; + + Assert(nWorkers > 0); + + /* Make sure that BufFile shared state is MAXALIGN'd */ + tapesSize = mul_size(sizeof(TapeShare), nWorkers); + tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes))); + + return tapesSize; +} + +/* + * tuplesort_initialize_shared - initialize shared tuplesort state + * + * Must be called from leader process before workers are launched, to + * establish state needed up-front for worker tuplesortstates. nWorkers + * should match the argument passed to tuplesort_estimate_shared(). + */ +void +tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg) +{ + int i; + + Assert(nWorkers > 0); + + SpinLockInit(&shared->mutex); + shared->currentWorker = 0; + shared->workersFinished = 0; + SharedFileSetInit(&shared->fileset, seg); + shared->nTapes = nWorkers; + for (i = 0; i < nWorkers; i++) + { + shared->tapes[i].firstblocknumber = 0L; + } +} + +/* + * tuplesort_attach_shared - attach to shared tuplesort state + * + * Must be called by all worker processes. + */ +void +tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg) +{ + /* Attach to SharedFileSet */ + SharedFileSetAttach(&shared->fileset, seg); +} + +/* + * worker_get_identifier - Assign and return ordinal identifier for worker + * + * The order in which these are assigned is not well defined, and should not + * matter; worker numbers across parallel sort participants need only be + * distinct and gapless. logtape.c requires this. + * + * Note that the identifiers assigned from here have no relation to + * ParallelWorkerNumber number, to avoid making any assumption about + * caller's requirements. However, we do follow the ParallelWorkerNumber + * convention of representing a non-worker with worker number -1. This + * includes the leader, as well as serial Tuplesort processes. + */ +static int +worker_get_identifier(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int worker; + + Assert(WORKER(state)); + + SpinLockAcquire(&shared->mutex); + worker = shared->currentWorker++; + SpinLockRelease(&shared->mutex); + + return worker; +} + +/* + * worker_freeze_result_tape - freeze worker's result tape for leader + * + * This is called by workers just after the result tape has been determined, + * instead of calling LogicalTapeFreeze() directly. They do so because + * workers require a few additional steps over similar serial + * TSS_SORTEDONTAPE external sort cases, which also happen here. The extra + * steps are around freeing now unneeded resources, and representing to + * leader that worker's input run is available for its merge. + * + * There should only be one final output run for each worker, which consists + * of all tuples that were originally input into worker. + */ +static void +worker_freeze_result_tape(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + TapeShare output; + + Assert(WORKER(state)); + Assert(state->result_tape != -1); + Assert(state->memtupcount == 0); + + /* + * Free most remaining memory, in case caller is sensitive to our holding + * on to it. memtuples may not be a tiny merge heap at this point. + */ + pfree(state->memtuples); + /* Be tidy */ + state->memtuples = NULL; + state->memtupsize = 0; + + /* + * Parallel worker requires result tape metadata, which is to be stored in + * shared memory for leader + */ + LogicalTapeFreeze(state->tapeset, state->result_tape, &output); + + /* Store properties of output tape, and update finished worker count */ + SpinLockAcquire(&shared->mutex); + shared->tapes[state->worker] = output; + shared->workersFinished++; + SpinLockRelease(&shared->mutex); +} + +/* + * worker_nomergeruns - dump memtuples in worker, without merging + * + * This called as an alternative to mergeruns() with a worker when no + * merging is required. + */ +static void +worker_nomergeruns(Tuplesortstate *state) +{ + Assert(WORKER(state)); + Assert(state->result_tape == -1); + + state->result_tape = state->tp_tapenum[state->destTape]; + worker_freeze_result_tape(state); +} + +/* + * leader_takeover_tapes - create tapeset for leader from worker tapes + * + * So far, leader Tuplesortstate has performed no actual sorting. By now, all + * sorting has occurred in workers, all of which must have already returned + * from tuplesort_performsort(). + * + * When this returns, leader process is left in a state that is virtually + * indistinguishable from it having generated runs as a serial external sort + * might have. + */ +static void +leader_takeover_tapes(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int nParticipants = state->nParticipants; + int workersFinished; + int j; + + Assert(LEADER(state)); + Assert(nParticipants >= 1); + + SpinLockAcquire(&shared->mutex); + workersFinished = shared->workersFinished; + SpinLockRelease(&shared->mutex); + + if (nParticipants != workersFinished) + elog(ERROR, "cannot take over tapes before all workers finish"); + + /* + * Create the tapeset from worker tapes, including a leader-owned tape at + * the end. Parallel workers are far more expensive than logical tapes, + * so the number of tapes allocated here should never be excessive. + * + * We still have a leader tape, though it's not possible to write to it + * due to restrictions in the shared fileset infrastructure used by + * logtape.c. It will never be written to in practice because + * randomAccess is disallowed for parallel sorts. + */ + inittapestate(state, nParticipants + 1); + state->tapeset = LogicalTapeSetCreate(nParticipants + 1, shared->tapes, + &shared->fileset, state->worker); + + /* mergeruns() relies on currentRun for # of runs (in one-pass cases) */ + state->currentRun = nParticipants; + + /* + * Initialize variables of Algorithm D to be consistent with runs from + * workers having been generated in the leader. + * + * There will always be exactly 1 run per worker, and exactly one input + * tape per run, because workers always output exactly 1 run, even when + * there were no input tuples for workers to sort. + */ + for (j = 0; j < state->maxTapes; j++) + { + /* One real run; no dummy runs for worker tapes */ + state->tp_fib[j] = 1; + state->tp_runs[j] = 1; + state->tp_dummy[j] = 0; + state->tp_tapenum[j] = j; + } + /* Leader tape gets one dummy run, and no real runs */ + state->tp_fib[state->tapeRange] = 0; + state->tp_runs[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 1; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} diff --git a/src/tuplesort12.c b/src/tuplesort12.c new file mode 100644 index 0000000000..f975d24a98 --- /dev/null +++ b/src/tuplesort12.c @@ -0,0 +1,4594 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about the external + * sorting algorithm. Historically, we divided the input into sorted runs + * using replacement selection, in the form of a priority tree implemented + * as a heap (essentially his Algorithm 5.2.3H), but now we always use + * quicksort for run generation. We merge the runs using polyphase merge, + * Knuth's Algorithm 5.4.2D. The logical "tapes" used by Algorithm D are + * implemented by logtape.c, which avoids space wastage by recycling disk + * space as soon as each block is read from its "tape". + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape (selected per Algorithm D). After the end of the + * input is reached, we dump out remaining tuples in memory into a final run, + * then merge the runs using Algorithm D. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and replace it + * with the next tuple from its source tape (if any). When the heap empties, + * the merge is complete. The basic merge algorithm thus needs very little + * memory --- only M tuples for an M-way merge, and M is constrained to a + * small number. However, we can still make good use of our full workMem + * allocation by pre-reading additional blocks from each source tape. Without + * prereading, our access pattern to the temporary file would be very erratic; + * on average we'd read one block from each of M source tapes during the same + * time that we're writing M blocks to the output tape, so there is no + * sequentiality of access at all, defeating the read-ahead methods used by + * most Unix kernels. Worse, the output tape gets written into a very random + * sequence of blocks of the temp file, ensuring that things will be even + * worse when it comes time to read that tape. A straightforward merge pass + * thus ends up doing a lot of waiting for disk seeks. We can improve matters + * by prereading from each source tape sequentially, loading about workMem/M + * bytes from each tape in turn, and making the sequential blocks immediately + * available for reuse. This approach helps to localize both read and write + * accesses. The pre-reading is handled by logtape.c, we just tell it how + * much memory to use for the buffers. + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the + * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according + * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that + * tape drives are expensive beasts, and in particular that there will always + * be many more runs than tape drives. In our implementation a "tape drive" + * doesn't cost much more than a few Kb of memory buffers, so we can afford + * to have lots of them. In particular, if we can have as many tape drives + * as sorted runs, we can eliminate any repeated I/O at all. In the current + * code we determine the number of tapes M on the basis of workMem: we want + * workMem/M to be large enough that we read a fair amount of data each time + * we preread from a tape, so as to maintain the locality of access described + * above. Nonetheless, with large workMem we can have many tapes (but not + * too many -- see the comments in tuplesort_merge_order). + * + * This module supports parallel sorting. Parallel sorts involve coordination + * among one or more worker processes, and a leader process, each with its own + * tuplesort state. The leader process (or, more accurately, the + * Tuplesortstate associated with a leader process) creates a full tapeset + * consisting of worker tapes with one run to merge; a run for every + * worker process. This is then merged. Worker processes are guaranteed to + * produce exactly one output run from their partial input. + * + * + * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* Sort parallel code from state for sort__start probes */ +#define PARALLEL_SORT(state) ((state)->shared == NULL ? 0 : \ + (state)->worker >= 0 ? 1 : 2) + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during merge, when we use a + * simple slab allocator). SortTuples also contain the tuple's first key + * column in Datum/nullflag format, and an index integer. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + * + * tupindex holds the input tape number that each tuple in the heap was read + * from during merge passes. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int tupindex; /* see notes above */ +} SortTuple; + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much data we'd like to read from each input + * tape during a preread cycle (see discussion at top of file). + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* number of tapes (Knuth's T) */ + int tapeRange; /* maxTapes-1 (Knuth's P) */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. Unless the slab allocator is + * used, after writing the tuple, pfree() the out-of-line data (not the + * SortTuple struct!), and increase state->availMem by the amount of + * memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, int tapenum, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. The tuple is allocated + * from the slab memory arena, or is palloc'd, see readtup_alloc(). + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Buffer size to use for reading input tapes, during merge. */ + size_t read_buffer_size; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Unless otherwise noted, all pointer variables below are pointers to + * arrays of length maxTapes, holding per-tape data. + */ + + /* + * This variable is only used during merge passes. mergeactive[i] is true + * if we are reading an input run from (actual) tape number i and have not + * yet exhausted that run. + */ + bool *mergeactive; /* active input run source? */ + + /* + * Variables for Algorithm D. Note that destTape is a "logical" tape + * number, ie, an index into the tp_xxx[] arrays. Be careful to keep + * "logical" and "actual" tape numbers straight! + */ + int Level; /* Knuth's l */ + int destTape; /* current output tape (Knuth's j, less 1) */ + int *tp_fib; /* Target Fibonacci run counts (A[]) */ + int *tp_runs; /* # of real runs on each tape */ + int *tp_dummy; /* # of dummy runs for each tape (D[]) */ + int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ + int activeTapes; /* # of active input tapes in merge pass */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + int result_tape; /* actual tape number of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run leader can merge. Typically includes a worker state held + * by the leader process itself. Set in the leader Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +/* + * Private mutable state of tuplesort-parallel-operation. This is allocated + * in shared memory. + */ +struct Sharedsort +{ + /* mutex protects all fields prior to tapes */ + slock_t mutex; + + /* + * currentWorker generates ordinal identifier numbers for parallel sort + * workers. These start from 0, and are always gapless. + * + * Workers increment workersFinished to indicate having finished. If this + * is equal to state.nParticipants within the leader, leader is ready to + * merge worker runs. + */ + int currentWorker; + int workersFinished; + + /* Temporary file space */ + SharedFileSet fileset; + + /* Size of tapes flexible array */ + int nTapes; + + /* + * Tapes array used by workers to report back information needed by the + * leader to concatenate all worker tapes into one for merging + */ + TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) +#define SERIAL(state) ((state)->shared == NULL) +#define WORKER(state) ((state)->shared && (state)->worker != -1) +#define LEADER(state) ((state)->shared && (state)->worker == -1) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on readtup and copytup routines + * to use the right memory context for these tuples (and to not use the + * reset context for anything whose lifetime needs to span multiple + * external sort runs). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ + do { \ + if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, + SortCoordinate coordinate, + bool randomAccess); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state, bool mergeruns); +static void inittapestate(Tuplesortstate *state, int maxTapes); +static void selectnewtape(Tuplesortstate *state); +static void init_slab_allocator(Tuplesortstate *state, int numSlots); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state); +static bool mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_delete_top(Tuplesortstate *state); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK); +static void markrunend(Tuplesortstate *state, int tapenum); +static void *readtup_alloc(Tuplesortstate *state, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int worker_get_identifier(Tuplesortstate *state); +static void worker_freeze_result_tape(Tuplesortstate *state); +static void worker_nomergeruns(Tuplesortstate *state); +static void leader_takeover_tapes(Tuplesortstate *state); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ +#include "qsort_tuple.c" + + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext sortcontext; + MemoryContext tuplecontext; + MemoryContext oldcontext; + + /* See leader_takeover_tapes() remarks on randomAccess support */ + if (coordinate && randomAccess) + elog(ERROR, "random access disallowed under parallel sort"); + + /* + * Create a working memory context for this sort operation. All data + * needed by the sort will live inside this context. + */ + sortcontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + tuplecontext = AllocSetContextCreate(sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + /* + * Make the Tuplesortstate within the per-sort context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(sortcontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->status = TSS_INITIAL; + state->randomAccess = randomAccess; + state->bounded = false; + state->tuples = true; + state->boundUsed = false; + + /* + * workMem is forced to be at least 64KB, the current minimum valid value + * for the work_mem GUC. This is a defense against parallel sort callers + * that divide out memory among many workers in a way that leaves each + * with very little memory. + */ + state->allowedMem = Max(workMem, 64) * (int64) 1024; + state->availMem = state->allowedMem; + state->sortcontext = sortcontext; + state->tuplecontext = tuplecontext; + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = Max(1024, + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); + + state->growmemtuples = true; + state->slabAllocatorUsed = false; + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = 0; + + /* + * maxTapes, tapeRange, and Algorithm D variables will be initialized by + * inittapes(), if needed + */ + + state->result_tape = -1; /* flag that result tape has not been formed */ + + /* + * Initialize parallel-related state based on coordination information + * from caller + */ + if (!coordinate) + { + /* Serial sort */ + state->shared = NULL; + state->worker = -1; + state->nParticipants = -1; + } + else if (coordinate->isWorker) + { + /* Parallel worker produces exactly one final run from all input */ + state->shared = coordinate->sharedsort; + state->worker = worker_get_identifier(state); + state->nParticipants = -1; + } + else + { + /* Parallel leader state only used for final merge */ + state->shared = coordinate->sharedsort; + state->worker = -1; + state->nParticipants = coordinate->nParticipants; + Assert(state->nParticipants >= 1); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple is needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each sortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. Parallel leader tuplesorts will always ignore the hint. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL); + Assert(state->memtupcount == 0); + Assert(!state->bounded); + Assert(!WORKER(state)); + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* Parallel leader ignores hint */ + if (LEADER(state)) + return; + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", + SERIAL(state) ? "external sort" : "parallel external sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "%s of worker %d ended, %ld KB used: %s", + SERIAL(state) ? "internal sort" : "unperformed parallel sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory, + * including the Tuplesortstate struct itself. + */ + MemoryContextDelete(state->sortcontext); +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return true if we were able to enlarge the array, false if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + Assert(!LEADER(state)); + + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state, true); + + /* + * Dump all tuples. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, replacing it with the new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_replace_top(state, tuple); + } + break; + + case TSS_BUILDRUNS: + + /* + * Save the tuple into the unsorted array (there must be space) + */ + state->memtuples[state->memtupcount++] = *tuple; + + /* + * If we are over the memory limit, dump all tuples. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort of worker %d starting: %s", + state->worker, pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory, or leader to take over worker tapes + */ + if (SERIAL(state)) + { + /* Just qsort 'em and we're done */ + tuplesort_sort_memtuples(state); + state->status = TSS_SORTEDINMEM; + } + else if (WORKER(state)) + { + /* + * Parallel workers must still dump out tuples to tape. No + * merge is required to produce single output run, though. + */ + inittapes(state, false); + dumptuples(state, true); + worker_nomergeruns(state); + state->status = TSS_SORTEDONTAPE; + } + else + { + /* + * Leader will take over worker tapes and merge worker runs. + * Note that mergeruns sets the correct state->status. + */ + leader_takeover_tapes(state); + mergeruns(state); + } + state->current = 0; + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess and !WORKER(), one run per tape). + * Note that mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort of worker %d done (except %d-way final merge): %s", + state->worker, state->activeTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort of worker %d done: %s", + state->worker, pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns false if no more tuples. + * Returned tuple belongs to tuplesort memory context, and must not be freed + * by caller. Note that fetched tuple is stored in memory that may be + * recycled by any future fetch. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup) +{ + unsigned int tuplen; + size_t nmoved; + + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->slabAllocatorUsed); + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(state->slabAllocatorUsed); + + /* + * The slot that held the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + if (forward) + { + if (state->eof_reached) + return false; + + if ((tuplen = getlen(state, state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle + * its memory on next call. (This can be NULL, in the + * !state->tuples case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + 2 * sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != 2 * sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + tuplen = getlen(state, state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + 2 * sizeof(unsigned int)); + if (nmoved == tuplen + sizeof(unsigned int)) + { + /* + * We backed up over the previous tuple, but there was no + * ending length word before it. That means that the prev + * tuple is the first tuple in the file. It is now the + * next to read in forward direction (not obviously right, + * but that is what in-memory case does). + */ + return false; + } + else if (nmoved != tuplen + 2 * sizeof(unsigned int)) + elog(ERROR, "bogus tuple length in backward scan"); + } + + tuplen = getlen(state, state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen); + if (nmoved != tuplen) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle its memory + * on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + + case TSS_FINALMERGE: + Assert(forward); + /* We are managing memory ourselves, with the slab allocator. */ + Assert(state->slabAllocatorUsed); + + /* + * The slab slot holding the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTape = state->memtuples[0].tupindex; + SortTuple newtup; + + *stup = state->memtuples[0]; + + /* + * Remember the tuple we return, so that we can recycle its + * memory on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + /* + * Pull next tuple from tape, and replace the returned tuple + * at top of the heap with it. + */ + if (!mergereadnext(state, srcTape, &newtup)) + { + /* + * If no more data, we've reached end of run on this tape. + * Remove the top node from the heap. + */ + tuplesort_heap_delete_top(state); + + /* + * Rewind to free the read buffer. It'd go away at the + * end of the sort anyway, but better to release the + * memory early. + */ + LogicalTapeRewindForWrite(state->tapeset, srcTape); + return true; + } + newtup.tupindex = srcTape; + tuplesort_heap_replace_top(state, &newtup); + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return true; else, clear the slot + * and return false. + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * If copy is true, the slot receives a tuple that's been copied into the + * caller's memory context, so that it will stay valid regardless of future + * manipulations of the tuplesort's state (up to and including deleting the + * tuplesort). If copy is false, the slot will just receive a pointer to a + * tuple held within the tuplesort, which is more efficient, but only safe for + * callers that are prepared to have any subsequent manipulation of the + * tuplesort's state invalidate slot contents. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (copy) + stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); + + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns false if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* use stup.tuple because stup.datum1 may be an abbreviation */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns true if successful, false if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /* + * We need one tape for each merge input, plus another one for the output, + * and each of these tapes needs buffer space. In addition we want + * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't + * count). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + */ + mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / + (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); + + /* + * Even in minimum memory, use at least a MINORDER merge. On the other + * hand, even when we have lots of memory, do not use more than a MAXORDER + * merge. Tapes are pretty cheap, but they're not entirely free. Each + * additional tape reduces the amount of memory available to build runs, + * which in turn can cause the same sort to need more runs, which makes + * merging slower even if it can still be done in a single pass. Also, + * high order merges are quite slow due to CPU cache effects; it can be + * faster to pay the I/O cost of a polyphase merge than to perform a + * single merge pass across many hundreds of tapes. + */ + mOrder = Max(mOrder, MINORDER); + mOrder = Min(mOrder, MAXORDER); + + return mOrder; +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we won't sort in memory. + */ +static void +inittapes(Tuplesortstate *state, bool mergeruns) +{ + int maxTapes, + j; + + Assert(!LEADER(state)); + + if (mergeruns) + { + /* Compute number of tapes to use: merge order plus 1 */ + maxTapes = tuplesort_merge_order(state->allowedMem) + 1; + } + else + { + /* Workers can sometimes produce single run, output without merge */ + Assert(WORKER(state)); + maxTapes = MINORDER + 1; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d switching to external sort with %d tapes: %s", + state->worker, maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* Create the tape set and allocate the per-tape data arrays */ + inittapestate(state, maxTapes); + state->tapeset = + LogicalTapeSetCreate(maxTapes, NULL, + state->shared ? &state->shared->fileset : NULL, + state->worker); + + state->currentRun = 0; + + /* + * Initialize variables of Algorithm D (step D1). + */ + for (j = 0; j < maxTapes; j++) + { + state->tp_fib[j] = 1; + state->tp_runs[j] = 0; + state->tp_dummy[j] = 1; + state->tp_tapenum[j] = j; + } + state->tp_fib[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 0; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * inittapestate - initialize generic tape management state + */ +static void +inittapestate(Tuplesortstate *state, int maxTapes) +{ + int64 tapeSpace; + + /* + * Decrease availMem to reflect the space needed for tape buffers; but + * don't decrease it to the point that we have no room for tuples. (That + * case is only likely to occur if sorting pass-by-value Datums; in all + * other scenarios the memtuples[] array is unlikely to occupy more than + * half of allowedMem. In the pass-by-value case it's not important to + * account for tuple space, so we don't care if LACKMEM becomes + * inaccurate.) + */ + tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. For parallel sorts, this should have been + * called already, but it doesn't matter if it is called a second time. + */ + PrepareTempTablespaces(); + + state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); + state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); + + /* Record # of tapes allocated (for duration of sort) */ + state->maxTapes = maxTapes; + /* Record maximum # of tapes usable as inputs when merging */ + state->tapeRange = maxTapes - 1; +} + +/* + * selectnewtape -- select new tape for new initial run. + * + * This is called after finishing a run when we know another run + * must be started. This implements steps D3, D4 of Algorithm D. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + int j; + int a; + + /* Step D3: advance j (destTape) */ + if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) + { + state->destTape++; + return; + } + if (state->tp_dummy[state->destTape] != 0) + { + state->destTape = 0; + return; + } + + /* Step D4: increase level */ + state->Level++; + a = state->tp_fib[0]; + for (j = 0; j < state->tapeRange; j++) + { + state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; + state->tp_fib[j] = a + state->tp_fib[j + 1]; + } + state->destTape = 0; +} + +/* + * Initialize the slab allocation arena, for the given number of slots. + */ +static void +init_slab_allocator(Tuplesortstate *state, int numSlots) +{ + if (numSlots > 0) + { + char *p; + int i; + + state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE); + state->slabMemoryEnd = state->slabMemoryBegin + + numSlots * SLAB_SLOT_SIZE; + state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin; + USEMEM(state, numSlots * SLAB_SLOT_SIZE); + + p = state->slabMemoryBegin; + for (i = 0; i < numSlots - 1; i++) + { + ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE); + p += SLAB_SLOT_SIZE; + } + ((SlabSlot *) p)->nextfree = NULL; + } + else + { + state->slabMemoryBegin = state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; + } + state->slabAllocatorUsed = true; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements steps D5, D6 of Algorithm D. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum, + svTape, + svRuns, + svDummy; + int numTapes; + int numInputTapes; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * Reset tuple memory. We've freed all the tuples that we previously + * allocated. We will use the slab allocator from now on. + */ + MemoryContextDelete(state->tuplecontext); + state->tuplecontext = NULL; + + /* + * We no longer need a large memtuples array. (We will allocate a smaller + * one for the heap later.) + */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + pfree(state->memtuples); + state->memtuples = NULL; + + /* + * If we had fewer runs than tapes, refund the memory that we imagined we + * would need for the tape buffers of the unused tapes. + * + * numTapes and numInputTapes reflect the actual number of tapes we will + * use. Note that the output tape's tape number is maxTapes - 1, so the + * tape numbers of the used tapes are not consecutive, and you cannot just + * loop from 0 to numTapes to visit all used tapes! + */ + if (state->Level == 1) + { + numInputTapes = state->currentRun; + numTapes = numInputTapes + 1; + FREEMEM(state, (state->maxTapes - numTapes) * TAPE_BUFFER_OVERHEAD); + } + else + { + numInputTapes = state->tapeRange; + numTapes = state->maxTapes; + } + + /* + * Initialize the slab allocator. We need one slab slot per input tape, + * for the tuples in the heap, plus one to hold the tuple last returned + * from tuplesort_gettuple. (If we're sorting pass-by-val Datums, + * however, we don't need to do allocate anything.) + * + * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism + * to track memory usage of individual tuples. + */ + if (state->tuples) + init_slab_allocator(state, numInputTapes + 1); + else + init_slab_allocator(state, 0); + + /* + * Allocate a new 'memtuples' array, for the heap. It will hold one tuple + * from each input tape. + */ + state->memtupsize = numInputTapes; + state->memtuples = (SortTuple *) palloc(numInputTapes * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* + * Use all the remaining memory we have available for read buffers among + * the input tapes. + * + * We don't try to "rebalance" the memory among tapes, when we start a new + * merge phase, even if some tapes are inactive in the new phase. That + * would be hard, because logtape.c doesn't know where one run ends and + * another begins. When a new merge phase begins, and a tape doesn't + * participate in it, its buffer nevertheless already contains tuples from + * the next run on same tape, so we cannot release the buffer. That's OK + * in practice, merge performance isn't that sensitive to the amount of + * buffers used, and most merge phases use all or almost all tapes, + * anyway. + */ +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d using " INT64_FORMAT " KB of memory for read buffers among %d input tapes", + state->worker, state->availMem / 1024, numInputTapes); +#endif + + state->read_buffer_size = Max(state->availMem / numInputTapes, 0); + USEMEM(state, state->read_buffer_size * numInputTapes); + + /* End of step D2: rewind all output tapes to prepare for merging */ + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + LogicalTapeRewindForRead(state->tapeset, tapenum, state->read_buffer_size); + + for (;;) + { + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess && !WORKER(state)) + { + bool allOneRun = true; + + Assert(state->tp_runs[state->tapeRange] == 0); + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { + allOneRun = false; + break; + } + } + if (allOneRun) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + + if (allDummy) + { + state->tp_dummy[state->tapeRange]++; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + state->tp_dummy[tapenum]--; + } + else + mergeonerun(state); + } + + /* Step D6: decrease level */ + if (--state->Level == 0) + break; + /* rewind output tape T to use as new input */ + LogicalTapeRewindForRead(state->tapeset, state->tp_tapenum[state->tapeRange], + state->read_buffer_size); + /* rewind used-up input tape P, and prepare it for write pass */ + LogicalTapeRewindForWrite(state->tapeset, state->tp_tapenum[state->tapeRange - 1]); + state->tp_runs[state->tapeRange - 1] = 0; + + /* + * reassign tape units per step D6; note we no longer care about A[] + */ + svTape = state->tp_tapenum[state->tapeRange]; + svDummy = state->tp_dummy[state->tapeRange]; + svRuns = state->tp_runs[state->tapeRange]; + for (tapenum = state->tapeRange; tapenum > 0; tapenum--) + { + state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; + state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; + state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; + } + state->tp_tapenum[0] = svTape; + state->tp_dummy[0] = svDummy; + state->tp_runs[0] = svRuns; + } + + /* + * Done. Knuth says that the result is on TAPE[1], but since we exited + * the loop without performing the last iteration of step D6, we have not + * rearranged the tape unit assignment, and therefore the result is on + * TAPE[T]. We need to do it this way so that we can freeze the final + * output tape while rewinding it. The last iteration of step D6 would be + * a waste of cycles anyway... + */ + state->result_tape = state->tp_tapenum[state->tapeRange]; + if (!WORKER(state)) + LogicalTapeFreeze(state->tapeset, state->result_tape, NULL); + else + worker_freeze_result_tape(state); + state->status = TSS_SORTEDONTAPE; + + /* Release the read buffers of all the other tapes, by rewinding them. */ + for (tapenum = 0; tapenum < state->maxTapes; tapenum++) + { + if (tapenum != state->result_tape) + LogicalTapeRewindForWrite(state->tapeset, tapenum); + } +} + +/* + * Merge one run from each input tape, except ones with dummy runs. + * + * This is the inner loop of Algorithm D step D5. We know that the + * output tape is TAPE[T]. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int destTape = state->tp_tapenum[state->tapeRange]; + int srcTape; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. We can also decrease the input run/dummy run counts. + */ + beginmerge(state); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + SortTuple stup; + + /* write the tuple to destTape */ + srcTape = state->memtuples[0].tupindex; + WRITETUP(state, destTape, &state->memtuples[0]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[0].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple); + + /* + * pull next tuple from the tape, and replace the written-out tuple in + * the heap with it. + */ + if (mergereadnext(state, srcTape, &stup)) + { + stup.tupindex = srcTape; + tuplesort_heap_replace_top(state, &stup); + + } + else + tuplesort_heap_delete_top(state); + } + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape, and increment its count of real runs. + */ + markrunend(state, destTape); + state->tp_runs[state->tapeRange]++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished %d-way merge step: %s", state->worker, + state->activeTapes, pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * beginmerge - initialize for a merge pass + * + * We decrease the counts of real and dummy runs for each tape, and mark + * which tapes contain active input runs in mergeactive[]. Then, fill the + * merge heap with the first tuple from each active tape. + */ +static void +beginmerge(Tuplesortstate *state) +{ + int activeTapes; + int tapenum; + int srcTape; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + /* Adjust run counts and mark the active tapes */ + memset(state->mergeactive, 0, + state->maxTapes * sizeof(*state->mergeactive)); + activeTapes = 0; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] > 0) + state->tp_dummy[tapenum]--; + else + { + Assert(state->tp_runs[tapenum] > 0); + state->tp_runs[tapenum]--; + srcTape = state->tp_tapenum[tapenum]; + state->mergeactive[srcTape] = true; + activeTapes++; + } + } + Assert(activeTapes > 0); + state->activeTapes = activeTapes; + + /* Load the merge heap with the first tuple from each input tape */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + SortTuple tup; + + if (mergereadnext(state, srcTape, &tup)) + { + tup.tupindex = srcTape; + tuplesort_heap_insert(state, &tup); + } + } +} + +/* + * mergereadnext - read next tuple from one merge input tape + * + * Returns false on EOF. + */ +static bool +mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup) +{ + unsigned int tuplen; + + if (!state->mergeactive[srcTape]) + return false; /* tape's run is already exhausted */ + + /* read next tuple, if any */ + if ((tuplen = getlen(state, srcTape, true)) == 0) + { + state->mergeactive[srcTape] = false; + return false; + } + READTUP(state, stup, srcTape, tuplen); + + return true; +} + +/* + * dumptuples - remove tuples from memtuples and write initial run to tape + * + * When alltuples = true, dump everything currently in memory. (This case is + * only used at end of input data.) + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Nothing to do if we still fit in available memory and have array slots, + * unless this is the final call during initial run generation. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state) && + !alltuples) + return; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. + * + * In general, short final runs are quite possible. Rather than allowing + * a special case where there was a superfluous selectnewtape() call (i.e. + * a call with no subsequent run actually written to destTape), we prefer + * to write out a 0 tuple run. + * + * mergereadnext() is prepared for 0 tuple runs, and will reliably mark + * the tape inactive for the merge when called from beginmerge(). This + * case is therefore similar to the case where mergeonerun() finds a dummy + * run for the tape, and so doesn't need to merge a run from the tape (or + * conceptually "merges" the dummy run, if you prefer). According to + * Knuth, Algorithm D "isn't strictly optimal" in its method of + * distribution and dummy run assignment; this edge case seems very + * unlikely to make that appreciably worse. + */ + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d starting quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in the sizes of incoming tuples. Fragmentation due to + * AllocSetFree's bucketing by size class might be particularly bad if + * this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state, state->tp_tapenum[state->destTape]); + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished writing run %d to tape %d: %s", + state->worker, state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + if (!alltuples) + selectnewtape(state); +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewindForRead(state->tapeset, + state->result_tape, + 0); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->tapeset, + state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + LogicalTapeSeek(state->tapeset, + state->result_tape, + state->markpos_block, + state->markpos_offset); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + TuplesortInstrumentation *stats) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + stats->spaceType = SORT_SPACE_TYPE_DISK; + stats->spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024); + } + else + { + stats->spaceType = SORT_SPACE_TYPE_MEMORY; + stats->spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; + } + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT; + else + stats->sortMethod = SORT_TYPE_QUICKSORT; + break; + case TSS_SORTEDONTAPE: + stats->sortMethod = SORT_TYPE_EXTERNAL_SORT; + break; + case TSS_FINALMERGE: + stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE; + break; + default: + stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS; + break; + } +} + +/* + * Convert TuplesortMethod to a string. + */ +const char * +tuplesort_method_name(TuplesortMethod m) +{ + switch (m) + { + case SORT_TYPE_STILL_IN_PROGRESS: + return "still in progress"; + case SORT_TYPE_TOP_N_HEAPSORT: + return "top-N heapsort"; + case SORT_TYPE_QUICKSORT: + return "quicksort"; + case SORT_TYPE_EXTERNAL_SORT: + return "external sort"; + case SORT_TYPE_EXTERNAL_MERGE: + return "external merge"; + } + + return "unknown"; +} + +/* + * Convert TuplesortSpaceType to a string. + */ +const char * +tuplesort_space_type_name(TuplesortSpaceType t) +{ + Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY); + return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory"; +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + */ + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + Assert(SERIAL(state)); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount < state->bound) + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup); + } + else + { + /* + * The heap is full. Replace the largest entry with the new + * tuple, or just discard it, if it's larger than anything already + * in the heap. + */ + if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + tuplesort_heap_replace_top(state, &state->memtuples[i]); + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + Assert(SERIAL(state)); + + /* + * We can unheapify in place because each delete-top call will remove the + * largest entry, which we can promptly store in the newly freed slot at + * the end. Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_delete_top(state); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts, and external sort runs. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + Assert(!LEADER(state)); + + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: For some callers, tuple points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples; + int j; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (COMPARETUP(state, tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * Remove the tuple at state->memtuples[0] from the heap. Decrement + * memtupcount, and sift up to maintain the heap invariant. + * + * The caller has already free'd the tuple the top node points to, + * if necessary. + */ +static void +tuplesort_heap_delete_top(Tuplesortstate *state) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + + if (--state->memtupcount <= 0) + return; + + /* + * Remove the last tuple in the heap, and re-insert it, by replacing the + * current top node with it. + */ + tuple = &memtuples[state->memtupcount]; + tuplesort_heap_replace_top(state, tuple); +} + +/* + * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to + * maintain the heap invariant. + * + * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H, + * Heapsort, steps H3-H8). + */ +static void +tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples = state->memtuples; + unsigned int i, + n; + + Assert(state->memtupcount >= 1); + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (COMPARETUP(state, tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(Tuplesortstate *state, int tapenum, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(state->tapeset, tapenum, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(Tuplesortstate *state, int tapenum) +{ + unsigned int len = 0; + + LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. + * + * We use next free slot from the slab allocator, or palloc() if the tuple + * is too large for that. + */ +static void * +readtup_alloc(Tuplesortstate *state, Size tuplen) +{ + SlabSlot *buf; + + /* + * We pre-allocate enough slots in the slab arena that we should never run + * out. + */ + Assert(state->slabFreeHead); + + if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead) + return MemoryContextAlloc(state->sortcontext, tuplen); + else + { + buf = state->slabFreeHead; + /* Reuse this slot */ + state->slabFreeHead = buf->nextfree; + + return buf; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); + } +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(state->tapeset, tapenum, + tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreHeapTuple(ltup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreHeapTuple(rtup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); + } +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This is required for + * btree indexes, since heap TID is treated as an implicit last key + * attribute in order to ensure that all keys in the index are physically + * unique. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + Bucket bucket1; + Bucket bucket2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + Assert(!b->isnull1); + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + if (bucket1 > bucket2) + return 1; + else if (bucket1 < bucket2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + IndexTuple tuple = (IndexTuple) tup; + unsigned int tuplen = IndexTupleSize(tuple); + IndexTuple newtuple; + Datum original; + + /* copy the tuple into sort storage */ + newtuple = (IndexTuple) MemoryContextAlloc(state->tuplecontext, tuplen); + memcpy(newtuple, tuple, tuplen); + USEMEM(state, GetMemoryChunkSpace(newtuple)); + stup->tuple = (void *) newtuple; + /* set up first-column key value */ + original = index_getattr(newtuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (IndexTuple) mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } +} + +static void +writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); + } +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(state->tapeset, tapenum, + waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + + if (!state->slabAllocatorUsed && stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(state->tapeset, tapenum, + &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); +} + +/* + * Parallel sort routines + */ + +/* + * tuplesort_estimate_shared - estimate required shared memory allocation + * + * nWorkers is an estimate of the number of workers (it's the number that + * will be requested). + */ +Size +tuplesort_estimate_shared(int nWorkers) +{ + Size tapesSize; + + Assert(nWorkers > 0); + + /* Make sure that BufFile shared state is MAXALIGN'd */ + tapesSize = mul_size(sizeof(TapeShare), nWorkers); + tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes))); + + return tapesSize; +} + +/* + * tuplesort_initialize_shared - initialize shared tuplesort state + * + * Must be called from leader process before workers are launched, to + * establish state needed up-front for worker tuplesortstates. nWorkers + * should match the argument passed to tuplesort_estimate_shared(). + */ +void +tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg) +{ + int i; + + Assert(nWorkers > 0); + + SpinLockInit(&shared->mutex); + shared->currentWorker = 0; + shared->workersFinished = 0; + SharedFileSetInit(&shared->fileset, seg); + shared->nTapes = nWorkers; + for (i = 0; i < nWorkers; i++) + { + shared->tapes[i].firstblocknumber = 0L; + } +} + +/* + * tuplesort_attach_shared - attach to shared tuplesort state + * + * Must be called by all worker processes. + */ +void +tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg) +{ + /* Attach to SharedFileSet */ + SharedFileSetAttach(&shared->fileset, seg); +} + +/* + * worker_get_identifier - Assign and return ordinal identifier for worker + * + * The order in which these are assigned is not well defined, and should not + * matter; worker numbers across parallel sort participants need only be + * distinct and gapless. logtape.c requires this. + * + * Note that the identifiers assigned from here have no relation to + * ParallelWorkerNumber number, to avoid making any assumption about + * caller's requirements. However, we do follow the ParallelWorkerNumber + * convention of representing a non-worker with worker number -1. This + * includes the leader, as well as serial Tuplesort processes. + */ +static int +worker_get_identifier(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int worker; + + Assert(WORKER(state)); + + SpinLockAcquire(&shared->mutex); + worker = shared->currentWorker++; + SpinLockRelease(&shared->mutex); + + return worker; +} + +/* + * worker_freeze_result_tape - freeze worker's result tape for leader + * + * This is called by workers just after the result tape has been determined, + * instead of calling LogicalTapeFreeze() directly. They do so because + * workers require a few additional steps over similar serial + * TSS_SORTEDONTAPE external sort cases, which also happen here. The extra + * steps are around freeing now unneeded resources, and representing to + * leader that worker's input run is available for its merge. + * + * There should only be one final output run for each worker, which consists + * of all tuples that were originally input into worker. + */ +static void +worker_freeze_result_tape(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + TapeShare output; + + Assert(WORKER(state)); + Assert(state->result_tape != -1); + Assert(state->memtupcount == 0); + + /* + * Free most remaining memory, in case caller is sensitive to our holding + * on to it. memtuples may not be a tiny merge heap at this point. + */ + pfree(state->memtuples); + /* Be tidy */ + state->memtuples = NULL; + state->memtupsize = 0; + + /* + * Parallel worker requires result tape metadata, which is to be stored in + * shared memory for leader + */ + LogicalTapeFreeze(state->tapeset, state->result_tape, &output); + + /* Store properties of output tape, and update finished worker count */ + SpinLockAcquire(&shared->mutex); + shared->tapes[state->worker] = output; + shared->workersFinished++; + SpinLockRelease(&shared->mutex); +} + +/* + * worker_nomergeruns - dump memtuples in worker, without merging + * + * This called as an alternative to mergeruns() with a worker when no + * merging is required. + */ +static void +worker_nomergeruns(Tuplesortstate *state) +{ + Assert(WORKER(state)); + Assert(state->result_tape == -1); + + state->result_tape = state->tp_tapenum[state->destTape]; + worker_freeze_result_tape(state); +} + +/* + * leader_takeover_tapes - create tapeset for leader from worker tapes + * + * So far, leader Tuplesortstate has performed no actual sorting. By now, all + * sorting has occurred in workers, all of which must have already returned + * from tuplesort_performsort(). + * + * When this returns, leader process is left in a state that is virtually + * indistinguishable from it having generated runs as a serial external sort + * might have. + */ +static void +leader_takeover_tapes(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int nParticipants = state->nParticipants; + int workersFinished; + int j; + + Assert(LEADER(state)); + Assert(nParticipants >= 1); + + SpinLockAcquire(&shared->mutex); + workersFinished = shared->workersFinished; + SpinLockRelease(&shared->mutex); + + if (nParticipants != workersFinished) + elog(ERROR, "cannot take over tapes before all workers finish"); + + /* + * Create the tapeset from worker tapes, including a leader-owned tape at + * the end. Parallel workers are far more expensive than logical tapes, + * so the number of tapes allocated here should never be excessive. + * + * We still have a leader tape, though it's not possible to write to it + * due to restrictions in the shared fileset infrastructure used by + * logtape.c. It will never be written to in practice because + * randomAccess is disallowed for parallel sorts. + */ + inittapestate(state, nParticipants + 1); + state->tapeset = LogicalTapeSetCreate(nParticipants + 1, shared->tapes, + &shared->fileset, state->worker); + + /* mergeruns() relies on currentRun for # of runs (in one-pass cases) */ + state->currentRun = nParticipants; + + /* + * Initialize variables of Algorithm D to be consistent with runs from + * workers having been generated in the leader. + * + * There will always be exactly 1 run per worker, and exactly one input + * tape per run, because workers always output exactly 1 run, even when + * there were no input tuples for workers to sort. + */ + for (j = 0; j < state->maxTapes; j++) + { + /* One real run; no dummy runs for worker tapes */ + state->tp_fib[j] = 1; + state->tp_runs[j] = 1; + state->tp_dummy[j] = 0; + state->tp_tapenum[j] = j; + } + /* Leader tape gets one dummy run, and no real runs */ + state->tp_fib[state->tapeRange] = 0; + state->tp_runs[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 1; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} diff --git a/src/tuplesort13.c b/src/tuplesort13.c new file mode 100644 index 0000000000..98d68a143d --- /dev/null +++ b/src/tuplesort13.c @@ -0,0 +1,4706 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about the external + * sorting algorithm. Historically, we divided the input into sorted runs + * using replacement selection, in the form of a priority tree implemented + * as a heap (essentially his Algorithm 5.2.3H), but now we always use + * quicksort for run generation. We merge the runs using polyphase merge, + * Knuth's Algorithm 5.4.2D. The logical "tapes" used by Algorithm D are + * implemented by logtape.c, which avoids space wastage by recycling disk + * space as soon as each block is read from its "tape". + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape (selected per Algorithm D). After the end of the + * input is reached, we dump out remaining tuples in memory into a final run, + * then merge the runs using Algorithm D. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and replace it + * with the next tuple from its source tape (if any). When the heap empties, + * the merge is complete. The basic merge algorithm thus needs very little + * memory --- only M tuples for an M-way merge, and M is constrained to a + * small number. However, we can still make good use of our full workMem + * allocation by pre-reading additional blocks from each source tape. Without + * prereading, our access pattern to the temporary file would be very erratic; + * on average we'd read one block from each of M source tapes during the same + * time that we're writing M blocks to the output tape, so there is no + * sequentiality of access at all, defeating the read-ahead methods used by + * most Unix kernels. Worse, the output tape gets written into a very random + * sequence of blocks of the temp file, ensuring that things will be even + * worse when it comes time to read that tape. A straightforward merge pass + * thus ends up doing a lot of waiting for disk seeks. We can improve matters + * by prereading from each source tape sequentially, loading about workMem/M + * bytes from each tape in turn, and making the sequential blocks immediately + * available for reuse. This approach helps to localize both read and write + * accesses. The pre-reading is handled by logtape.c, we just tell it how + * much memory to use for the buffers. + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the + * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according + * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that + * tape drives are expensive beasts, and in particular that there will always + * be many more runs than tape drives. In our implementation a "tape drive" + * doesn't cost much more than a few Kb of memory buffers, so we can afford + * to have lots of them. In particular, if we can have as many tape drives + * as sorted runs, we can eliminate any repeated I/O at all. In the current + * code we determine the number of tapes M on the basis of workMem: we want + * workMem/M to be large enough that we read a fair amount of data each time + * we preread from a tape, so as to maintain the locality of access described + * above. Nonetheless, with large workMem we can have many tapes (but not + * too many -- see the comments in tuplesort_merge_order). + * + * This module supports parallel sorting. Parallel sorts involve coordination + * among one or more worker processes, and a leader process, each with its own + * tuplesort state. The leader process (or, more accurately, the + * Tuplesortstate associated with a leader process) creates a full tapeset + * consisting of worker tapes with one run to merge; a run for every + * worker process. This is then merged. Worker processes are guaranteed to + * produce exactly one output run from their partial input. + * + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* Sort parallel code from state for sort__start probes */ +#define PARALLEL_SORT(state) ((state)->shared == NULL ? 0 : \ + (state)->worker >= 0 ? 1 : 2) + +/* + * Initial size of memtuples array. We're trying to select this size so that + * array doesn't exceed ALLOCSET_SEPARATE_THRESHOLD and so that the overhead of + * allocation might possibly be lowered. However, we don't consider array sizes + * less than 1024. + * + */ +#define INITIAL_MEMTUPSIZE Max(1024, \ + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1) + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during merge, when we use a + * simple slab allocator). SortTuples also contain the tuple's first key + * column in Datum/nullflag format, and a source/input tape number that + * tracks which tape each heap element/slot belongs to during merging. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int srctape; /* source tape number */ +} SortTuple; + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much data we'd like to read from each input + * tape during a preread cycle (see discussion at top of file). + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* number of tapes (Knuth's T) */ + int tapeRange; /* maxTapes-1 (Knuth's P) */ + int64 maxSpace; /* maximum amount of space occupied among sort + * of groups, either in-memory or on-disk */ + bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk + * space, false when it's value for in-memory + * space */ + TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ + MemoryContext maincontext; /* memory context for tuple sort metadata that + * persists across multiple batches */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. Unless the slab allocator is + * used, after writing the tuple, pfree() the out-of-line data (not the + * SortTuple struct!), and increase state->availMem by the amount of + * memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, int tapenum, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. The tuple is allocated + * from the slab memory arena, or is palloc'd, see readtup_alloc(). + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Buffer size to use for reading input tapes, during merge. */ + size_t read_buffer_size; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Unless otherwise noted, all pointer variables below are pointers to + * arrays of length maxTapes, holding per-tape data. + */ + + /* + * This variable is only used during merge passes. mergeactive[i] is true + * if we are reading an input run from (actual) tape number i and have not + * yet exhausted that run. + */ + bool *mergeactive; /* active input run source? */ + + /* + * Variables for Algorithm D. Note that destTape is a "logical" tape + * number, ie, an index into the tp_xxx[] arrays. Be careful to keep + * "logical" and "actual" tape numbers straight! + */ + int Level; /* Knuth's l */ + int destTape; /* current output tape (Knuth's j, less 1) */ + int *tp_fib; /* Target Fibonacci run counts (A[]) */ + int *tp_runs; /* # of real runs on each tape */ + int *tp_dummy; /* # of dummy runs for each tape (D[]) */ + int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ + int activeTapes; /* # of active input tapes in merge pass */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + int result_tape; /* actual tape number of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run leader can merge. Typically includes a worker state held + * by the leader process itself. Set in the leader Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +/* + * Private mutable state of tuplesort-parallel-operation. This is allocated + * in shared memory. + */ +struct Sharedsort +{ + /* mutex protects all fields prior to tapes */ + slock_t mutex; + + /* + * currentWorker generates ordinal identifier numbers for parallel sort + * workers. These start from 0, and are always gapless. + * + * Workers increment workersFinished to indicate having finished. If this + * is equal to state.nParticipants within the leader, leader is ready to + * merge worker runs. + */ + int currentWorker; + int workersFinished; + + /* Temporary file space */ + SharedFileSet fileset; + + /* Size of tapes flexible array */ + int nTapes; + + /* + * Tapes array used by workers to report back information needed by the + * leader to concatenate all worker tapes into one for merging + */ + TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) +#define SERIAL(state) ((state)->shared == NULL) +#define WORKER(state) ((state)->shared && (state)->worker != -1) +#define LEADER(state) ((state)->shared && (state)->worker == -1) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on copytup routines to use the + * correct memory context for these tuples (and to not use the reset + * context for anything whose lifetime needs to span multiple external + * sort runs). readtup routines use the slab allocator (they cannot use + * the reset context because it gets deleted at the point that merging + * begins). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ + do { \ + if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, + SortCoordinate coordinate, + bool randomAccess); +static void tuplesort_begin_batch(Tuplesortstate *state); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state, bool mergeruns); +static void inittapestate(Tuplesortstate *state, int maxTapes); +static void selectnewtape(Tuplesortstate *state); +static void init_slab_allocator(Tuplesortstate *state, int numSlots); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state); +static bool mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_delete_top(Tuplesortstate *state); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK); +static void markrunend(Tuplesortstate *state, int tapenum); +static void *readtup_alloc(Tuplesortstate *state, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int worker_get_identifier(Tuplesortstate *state); +static void worker_freeze_result_tape(Tuplesortstate *state); +static void worker_nomergeruns(Tuplesortstate *state); +static void leader_takeover_tapes(Tuplesortstate *state); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); +static void tuplesort_free(Tuplesortstate *state); +static void tuplesort_updatemax(Tuplesortstate *state); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ +#include "qsort_tuple.c" + + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext maincontext; + MemoryContext sortcontext; + MemoryContext oldcontext; + + /* See leader_takeover_tapes() remarks on randomAccess support */ + if (coordinate && randomAccess) + elog(ERROR, "random access disallowed under parallel sort"); + + /* + * Memory context surviving tuplesort_reset. This memory context holds + * data which is useful to keep while sorting multiple similar batches. + */ + maincontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Create a working memory context for one sort operation. The content of + * this context is deleted by tuplesort_reset. + */ + sortcontext = AllocSetContextCreate(maincontext, + "TupleSort sort", + ALLOCSET_DEFAULT_SIZES); + + /* + * Additionally a working memory context for tuples is setup in + * tuplesort_begin_batch. + */ + + /* + * Make the Tuplesortstate within the per-sortstate context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(maincontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->randomAccess = randomAccess; + state->tuples = true; + + /* + * workMem is forced to be at least 64KB, the current minimum valid value + * for the work_mem GUC. This is a defense against parallel sort callers + * that divide out memory among many workers in a way that leaves each + * with very little memory. + */ + state->allowedMem = Max(workMem, 64) * (int64) 1024; + state->sortcontext = sortcontext; + state->maincontext = maincontext; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = INITIAL_MEMTUPSIZE; + state->memtuples = NULL; + + /* + * After all of the other non-parallel-related state, we setup all of the + * state needed for each batch. + */ + tuplesort_begin_batch(state); + + /* + * Initialize parallel-related state based on coordination information + * from caller + */ + if (!coordinate) + { + /* Serial sort */ + state->shared = NULL; + state->worker = -1; + state->nParticipants = -1; + } + else if (coordinate->isWorker) + { + /* Parallel worker produces exactly one final run from all input */ + state->shared = coordinate->sharedsort; + state->worker = worker_get_identifier(state); + state->nParticipants = -1; + } + else + { + /* Parallel leader state only used for final merge */ + state->shared = coordinate->sharedsort; + state->worker = -1; + state->nParticipants = coordinate->nParticipants; + Assert(state->nParticipants >= 1); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_begin_batch + * + * Setup, or reset, all state need for processing a new set of tuples with this + * sort state. Called both from tuplesort_begin_common (the first time sorting + * with this sort state) and tuplesort_reset (for subsequent usages). + */ +static void +tuplesort_begin_batch(Tuplesortstate *state) +{ + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + state->tuplecontext = AllocSetContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + state->status = TSS_INITIAL; + state->bounded = false; + state->boundUsed = false; + + state->availMem = state->allowedMem; + + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->growmemtuples = true; + state->slabAllocatorUsed = false; + if (state->memtuples != NULL && state->memtupsize != INITIAL_MEMTUPSIZE) + { + pfree(state->memtuples); + state->memtuples = NULL; + state->memtupsize = INITIAL_MEMTUPSIZE; + } + if (state->memtuples == NULL) + { + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + } + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = 0; + + /* + * maxTapes, tapeRange, and Algorithm D variables will be initialized by + * inittapes(), if needed + */ + + state->result_tape = -1; /* flag that result tape has not been formed */ + + MemoryContextSwitchTo(oldcontext); +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple as needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each SortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. Parallel leader tuplesorts will always ignore the hint. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL && state->memtupcount == 0); + /* Can't set the bound twice, either */ + Assert(!state->bounded); + /* Also, this shouldn't be called in a parallel worker */ + Assert(!WORKER(state)); + + /* Parallel leader allows but ignores hint */ + if (LEADER(state)) + return; + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_used_bound + * + * Allow callers to find out if the sort state was able to use a bound. + */ +bool +tuplesort_used_bound(Tuplesortstate *state) +{ + return state->boundUsed; +} + +/* + * tuplesort_free + * + * Internal routine for freeing resources of tuplesort. + */ +static void +tuplesort_free(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", + SERIAL(state) ? "external sort" : "parallel external sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "%s of worker %d ended, %ld KB used: %s", + SERIAL(state) ? "internal sort" : "unperformed parallel sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory. + */ + MemoryContextReset(state->sortcontext); +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + tuplesort_free(state); + + /* + * Free the main memory context, including the Tuplesortstate struct + * itself. + */ + MemoryContextDelete(state->maincontext); +} + +/* + * tuplesort_updatemax + * + * Update maximum resource usage statistics. + */ +static void +tuplesort_updatemax(Tuplesortstate *state) +{ + int64 spaceUsed; + bool isSpaceDisk; + + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + isSpaceDisk = true; + spaceUsed = LogicalTapeSetBlocks(state->tapeset) * BLCKSZ; + } + else + { + isSpaceDisk = false; + spaceUsed = state->allowedMem - state->availMem; + } + + /* + * Sort evicts data to the disk when it wasn't able to fit that data into + * main memory. This is why we assume space used on the disk to be more + * important for tracking resource usage than space used in memory. Note + * that the amount of space occupied by some tupleset on the disk might be + * less than amount of space occupied by the same tupleset in memory due + * to more compact representation. + */ + if ((isSpaceDisk && !state->isMaxSpaceDisk) || + (isSpaceDisk == state->isMaxSpaceDisk && spaceUsed > state->maxSpace)) + { + state->maxSpace = spaceUsed; + state->isMaxSpaceDisk = isSpaceDisk; + state->maxSpaceStatus = state->status; + } +} + +/* + * tuplesort_reset + * + * Reset the tuplesort. Reset all the data in the tuplesort, but leave the + * meta-information in. After tuplesort_reset, tuplesort is ready to start + * a new sort. This allows avoiding recreation of tuple sort states (and + * save resources) when sorting multiple small batches. + */ +void +tuplesort_reset(Tuplesortstate *state) +{ + tuplesort_updatemax(state); + tuplesort_free(state); + + /* + * After we've freed up per-batch memory, re-setup all of the state common + * to both the first batch and any subsequent batch. + */ + tuplesort_begin_batch(state); + + state->lastReturnedTuple = NULL; + state->slabMemoryBegin = NULL; + state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return true if we were able to enlarge the array, false if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + Assert(!LEADER(state)); + + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state, true); + + /* + * Dump all tuples. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, replacing it with the new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_replace_top(state, tuple); + } + break; + + case TSS_BUILDRUNS: + + /* + * Save the tuple into the unsorted array (there must be space) + */ + state->memtuples[state->memtupcount++] = *tuple; + + /* + * If we are over the memory limit, dump all tuples. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort of worker %d starting: %s", + state->worker, pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory, or leader to take over worker tapes + */ + if (SERIAL(state)) + { + /* Just qsort 'em and we're done */ + tuplesort_sort_memtuples(state); + state->status = TSS_SORTEDINMEM; + } + else if (WORKER(state)) + { + /* + * Parallel workers must still dump out tuples to tape. No + * merge is required to produce single output run, though. + */ + inittapes(state, false); + dumptuples(state, true); + worker_nomergeruns(state); + state->status = TSS_SORTEDONTAPE; + } + else + { + /* + * Leader will take over worker tapes and merge worker runs. + * Note that mergeruns sets the correct state->status. + */ + leader_takeover_tapes(state); + mergeruns(state); + } + state->current = 0; + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess and !WORKER(), one run per tape). + * Note that mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort of worker %d done (except %d-way final merge): %s", + state->worker, state->activeTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort of worker %d done: %s", + state->worker, pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns false if no more tuples. + * Returned tuple belongs to tuplesort memory context, and must not be freed + * by caller. Note that fetched tuple is stored in memory that may be + * recycled by any future fetch. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup) +{ + unsigned int tuplen; + size_t nmoved; + + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->slabAllocatorUsed); + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(state->slabAllocatorUsed); + + /* + * The slot that held the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + if (forward) + { + if (state->eof_reached) + return false; + + if ((tuplen = getlen(state, state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle + * its memory on next call. (This can be NULL, in the + * !state->tuples case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + 2 * sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != 2 * sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + tuplen = getlen(state, state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + 2 * sizeof(unsigned int)); + if (nmoved == tuplen + sizeof(unsigned int)) + { + /* + * We backed up over the previous tuple, but there was no + * ending length word before it. That means that the prev + * tuple is the first tuple in the file. It is now the + * next to read in forward direction (not obviously right, + * but that is what in-memory case does). + */ + return false; + } + else if (nmoved != tuplen + 2 * sizeof(unsigned int)) + elog(ERROR, "bogus tuple length in backward scan"); + } + + tuplen = getlen(state, state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen); + if (nmoved != tuplen) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle its memory + * on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + + case TSS_FINALMERGE: + Assert(forward); + /* We are managing memory ourselves, with the slab allocator. */ + Assert(state->slabAllocatorUsed); + + /* + * The slab slot holding the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTape = state->memtuples[0].srctape; + SortTuple newtup; + + *stup = state->memtuples[0]; + + /* + * Remember the tuple we return, so that we can recycle its + * memory on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + /* + * Pull next tuple from tape, and replace the returned tuple + * at top of the heap with it. + */ + if (!mergereadnext(state, srcTape, &newtup)) + { + /* + * If no more data, we've reached end of run on this tape. + * Remove the top node from the heap. + */ + tuplesort_heap_delete_top(state); + + /* + * Rewind to free the read buffer. It'd go away at the + * end of the sort anyway, but better to release the + * memory early. + */ + LogicalTapeRewindForWrite(state->tapeset, srcTape); + return true; + } + newtup.srctape = srcTape; + tuplesort_heap_replace_top(state, &newtup); + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return true; else, clear the slot + * and return false. + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * If copy is true, the slot receives a tuple that's been copied into the + * caller's memory context, so that it will stay valid regardless of future + * manipulations of the tuplesort's state (up to and including deleting the + * tuplesort). If copy is false, the slot will just receive a pointer to a + * tuple held within the tuplesort, which is more efficient, but only safe for + * callers that are prepared to have any subsequent manipulation of the + * tuplesort's state invalidate slot contents. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (copy) + stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); + + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns false if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* use stup.tuple because stup.datum1 may be an abbreviation */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns true if successful, false if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /* + * We need one tape for each merge input, plus another one for the output, + * and each of these tapes needs buffer space. In addition we want + * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't + * count). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + */ + mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / + (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); + + /* + * Even in minimum memory, use at least a MINORDER merge. On the other + * hand, even when we have lots of memory, do not use more than a MAXORDER + * merge. Tapes are pretty cheap, but they're not entirely free. Each + * additional tape reduces the amount of memory available to build runs, + * which in turn can cause the same sort to need more runs, which makes + * merging slower even if it can still be done in a single pass. Also, + * high order merges are quite slow due to CPU cache effects; it can be + * faster to pay the I/O cost of a polyphase merge than to perform a + * single merge pass across many hundreds of tapes. + */ + mOrder = Max(mOrder, MINORDER); + mOrder = Min(mOrder, MAXORDER); + + return mOrder; +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we won't sort in memory. + */ +static void +inittapes(Tuplesortstate *state, bool mergeruns) +{ + int maxTapes, + j; + + Assert(!LEADER(state)); + + if (mergeruns) + { + /* Compute number of tapes to use: merge order plus 1 */ + maxTapes = tuplesort_merge_order(state->allowedMem) + 1; + } + else + { + /* Workers can sometimes produce single run, output without merge */ + Assert(WORKER(state)); + maxTapes = MINORDER + 1; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d switching to external sort with %d tapes: %s", + state->worker, maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* Create the tape set and allocate the per-tape data arrays */ + inittapestate(state, maxTapes); + state->tapeset = + LogicalTapeSetCreate(maxTapes, false, NULL, + state->shared ? &state->shared->fileset : NULL, + state->worker); + + state->currentRun = 0; + + /* + * Initialize variables of Algorithm D (step D1). + */ + for (j = 0; j < maxTapes; j++) + { + state->tp_fib[j] = 1; + state->tp_runs[j] = 0; + state->tp_dummy[j] = 1; + state->tp_tapenum[j] = j; + } + state->tp_fib[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 0; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * inittapestate - initialize generic tape management state + */ +static void +inittapestate(Tuplesortstate *state, int maxTapes) +{ + int64 tapeSpace; + + /* + * Decrease availMem to reflect the space needed for tape buffers; but + * don't decrease it to the point that we have no room for tuples. (That + * case is only likely to occur if sorting pass-by-value Datums; in all + * other scenarios the memtuples[] array is unlikely to occupy more than + * half of allowedMem. In the pass-by-value case it's not important to + * account for tuple space, so we don't care if LACKMEM becomes + * inaccurate.) + */ + tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. For parallel sorts, this should have been + * called already, but it doesn't matter if it is called a second time. + */ + PrepareTempTablespaces(); + + state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); + state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); + + /* Record # of tapes allocated (for duration of sort) */ + state->maxTapes = maxTapes; + /* Record maximum # of tapes usable as inputs when merging */ + state->tapeRange = maxTapes - 1; +} + +/* + * selectnewtape -- select new tape for new initial run. + * + * This is called after finishing a run when we know another run + * must be started. This implements steps D3, D4 of Algorithm D. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + int j; + int a; + + /* Step D3: advance j (destTape) */ + if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) + { + state->destTape++; + return; + } + if (state->tp_dummy[state->destTape] != 0) + { + state->destTape = 0; + return; + } + + /* Step D4: increase level */ + state->Level++; + a = state->tp_fib[0]; + for (j = 0; j < state->tapeRange; j++) + { + state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; + state->tp_fib[j] = a + state->tp_fib[j + 1]; + } + state->destTape = 0; +} + +/* + * Initialize the slab allocation arena, for the given number of slots. + */ +static void +init_slab_allocator(Tuplesortstate *state, int numSlots) +{ + if (numSlots > 0) + { + char *p; + int i; + + state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE); + state->slabMemoryEnd = state->slabMemoryBegin + + numSlots * SLAB_SLOT_SIZE; + state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin; + USEMEM(state, numSlots * SLAB_SLOT_SIZE); + + p = state->slabMemoryBegin; + for (i = 0; i < numSlots - 1; i++) + { + ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE); + p += SLAB_SLOT_SIZE; + } + ((SlabSlot *) p)->nextfree = NULL; + } + else + { + state->slabMemoryBegin = state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; + } + state->slabAllocatorUsed = true; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements steps D5, D6 of Algorithm D. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum, + svTape, + svRuns, + svDummy; + int numTapes; + int numInputTapes; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * Reset tuple memory. We've freed all the tuples that we previously + * allocated. We will use the slab allocator from now on. + */ + MemoryContextResetOnly(state->tuplecontext); + + /* + * We no longer need a large memtuples array. (We will allocate a smaller + * one for the heap later.) + */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + pfree(state->memtuples); + state->memtuples = NULL; + + /* + * If we had fewer runs than tapes, refund the memory that we imagined we + * would need for the tape buffers of the unused tapes. + * + * numTapes and numInputTapes reflect the actual number of tapes we will + * use. Note that the output tape's tape number is maxTapes - 1, so the + * tape numbers of the used tapes are not consecutive, and you cannot just + * loop from 0 to numTapes to visit all used tapes! + */ + if (state->Level == 1) + { + numInputTapes = state->currentRun; + numTapes = numInputTapes + 1; + FREEMEM(state, (state->maxTapes - numTapes) * TAPE_BUFFER_OVERHEAD); + } + else + { + numInputTapes = state->tapeRange; + numTapes = state->maxTapes; + } + + /* + * Initialize the slab allocator. We need one slab slot per input tape, + * for the tuples in the heap, plus one to hold the tuple last returned + * from tuplesort_gettuple. (If we're sorting pass-by-val Datums, + * however, we don't need to do allocate anything.) + * + * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism + * to track memory usage of individual tuples. + */ + if (state->tuples) + init_slab_allocator(state, numInputTapes + 1); + else + init_slab_allocator(state, 0); + + /* + * Allocate a new 'memtuples' array, for the heap. It will hold one tuple + * from each input tape. + */ + state->memtupsize = numInputTapes; + state->memtuples = (SortTuple *) MemoryContextAlloc(state->maincontext, + numInputTapes * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* + * Use all the remaining memory we have available for read buffers among + * the input tapes. + * + * We don't try to "rebalance" the memory among tapes, when we start a new + * merge phase, even if some tapes are inactive in the new phase. That + * would be hard, because logtape.c doesn't know where one run ends and + * another begins. When a new merge phase begins, and a tape doesn't + * participate in it, its buffer nevertheless already contains tuples from + * the next run on same tape, so we cannot release the buffer. That's OK + * in practice, merge performance isn't that sensitive to the amount of + * buffers used, and most merge phases use all or almost all tapes, + * anyway. + */ +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d using " INT64_FORMAT " KB of memory for read buffers among %d input tapes", + state->worker, state->availMem / 1024, numInputTapes); +#endif + + state->read_buffer_size = Max(state->availMem / numInputTapes, 0); + USEMEM(state, state->read_buffer_size * numInputTapes); + + /* End of step D2: rewind all output tapes to prepare for merging */ + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + LogicalTapeRewindForRead(state->tapeset, tapenum, state->read_buffer_size); + + for (;;) + { + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess && !WORKER(state)) + { + bool allOneRun = true; + + Assert(state->tp_runs[state->tapeRange] == 0); + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { + allOneRun = false; + break; + } + } + if (allOneRun) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + + if (allDummy) + { + state->tp_dummy[state->tapeRange]++; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + state->tp_dummy[tapenum]--; + } + else + mergeonerun(state); + } + + /* Step D6: decrease level */ + if (--state->Level == 0) + break; + /* rewind output tape T to use as new input */ + LogicalTapeRewindForRead(state->tapeset, state->tp_tapenum[state->tapeRange], + state->read_buffer_size); + /* rewind used-up input tape P, and prepare it for write pass */ + LogicalTapeRewindForWrite(state->tapeset, state->tp_tapenum[state->tapeRange - 1]); + state->tp_runs[state->tapeRange - 1] = 0; + + /* + * reassign tape units per step D6; note we no longer care about A[] + */ + svTape = state->tp_tapenum[state->tapeRange]; + svDummy = state->tp_dummy[state->tapeRange]; + svRuns = state->tp_runs[state->tapeRange]; + for (tapenum = state->tapeRange; tapenum > 0; tapenum--) + { + state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; + state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; + state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; + } + state->tp_tapenum[0] = svTape; + state->tp_dummy[0] = svDummy; + state->tp_runs[0] = svRuns; + } + + /* + * Done. Knuth says that the result is on TAPE[1], but since we exited + * the loop without performing the last iteration of step D6, we have not + * rearranged the tape unit assignment, and therefore the result is on + * TAPE[T]. We need to do it this way so that we can freeze the final + * output tape while rewinding it. The last iteration of step D6 would be + * a waste of cycles anyway... + */ + state->result_tape = state->tp_tapenum[state->tapeRange]; + if (!WORKER(state)) + LogicalTapeFreeze(state->tapeset, state->result_tape, NULL); + else + worker_freeze_result_tape(state); + state->status = TSS_SORTEDONTAPE; + + /* Release the read buffers of all the other tapes, by rewinding them. */ + for (tapenum = 0; tapenum < state->maxTapes; tapenum++) + { + if (tapenum != state->result_tape) + LogicalTapeRewindForWrite(state->tapeset, tapenum); + } +} + +/* + * Merge one run from each input tape, except ones with dummy runs. + * + * This is the inner loop of Algorithm D step D5. We know that the + * output tape is TAPE[T]. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int destTape = state->tp_tapenum[state->tapeRange]; + int srcTape; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. We can also decrease the input run/dummy run counts. + */ + beginmerge(state); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + SortTuple stup; + + /* write the tuple to destTape */ + srcTape = state->memtuples[0].srctape; + WRITETUP(state, destTape, &state->memtuples[0]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[0].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple); + + /* + * pull next tuple from the tape, and replace the written-out tuple in + * the heap with it. + */ + if (mergereadnext(state, srcTape, &stup)) + { + stup.srctape = srcTape; + tuplesort_heap_replace_top(state, &stup); + } + else + tuplesort_heap_delete_top(state); + } + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape, and increment its count of real runs. + */ + markrunend(state, destTape); + state->tp_runs[state->tapeRange]++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished %d-way merge step: %s", state->worker, + state->activeTapes, pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * beginmerge - initialize for a merge pass + * + * We decrease the counts of real and dummy runs for each tape, and mark + * which tapes contain active input runs in mergeactive[]. Then, fill the + * merge heap with the first tuple from each active tape. + */ +static void +beginmerge(Tuplesortstate *state) +{ + int activeTapes; + int tapenum; + int srcTape; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + /* Adjust run counts and mark the active tapes */ + memset(state->mergeactive, 0, + state->maxTapes * sizeof(*state->mergeactive)); + activeTapes = 0; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] > 0) + state->tp_dummy[tapenum]--; + else + { + Assert(state->tp_runs[tapenum] > 0); + state->tp_runs[tapenum]--; + srcTape = state->tp_tapenum[tapenum]; + state->mergeactive[srcTape] = true; + activeTapes++; + } + } + Assert(activeTapes > 0); + state->activeTapes = activeTapes; + + /* Load the merge heap with the first tuple from each input tape */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + SortTuple tup; + + if (mergereadnext(state, srcTape, &tup)) + { + tup.srctape = srcTape; + tuplesort_heap_insert(state, &tup); + } + } +} + +/* + * mergereadnext - read next tuple from one merge input tape + * + * Returns false on EOF. + */ +static bool +mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup) +{ + unsigned int tuplen; + + if (!state->mergeactive[srcTape]) + return false; /* tape's run is already exhausted */ + + /* read next tuple, if any */ + if ((tuplen = getlen(state, srcTape, true)) == 0) + { + state->mergeactive[srcTape] = false; + return false; + } + READTUP(state, stup, srcTape, tuplen); + + return true; +} + +/* + * dumptuples - remove tuples from memtuples and write initial run to tape + * + * When alltuples = true, dump everything currently in memory. (This case is + * only used at end of input data.) + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Nothing to do if we still fit in available memory and have array slots, + * unless this is the final call during initial run generation. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state) && + !alltuples) + return; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. + * + * In general, short final runs are quite possible. Rather than allowing + * a special case where there was a superfluous selectnewtape() call (i.e. + * a call with no subsequent run actually written to destTape), we prefer + * to write out a 0 tuple run. + * + * mergereadnext() is prepared for 0 tuple runs, and will reliably mark + * the tape inactive for the merge when called from beginmerge(). This + * case is therefore similar to the case where mergeonerun() finds a dummy + * run for the tape, and so doesn't need to merge a run from the tape (or + * conceptually "merges" the dummy run, if you prefer). According to + * Knuth, Algorithm D "isn't strictly optimal" in its method of + * distribution and dummy run assignment; this edge case seems very + * unlikely to make that appreciably worse. + */ + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d starting quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in the sizes of incoming tuples. Fragmentation due to + * AllocSetFree's bucketing by size class might be particularly bad if + * this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state, state->tp_tapenum[state->destTape]); + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished writing run %d to tape %d: %s", + state->worker, state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + if (!alltuples) + selectnewtape(state); +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewindForRead(state->tapeset, + state->result_tape, + 0); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->tapeset, + state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + LogicalTapeSeek(state->tapeset, + state->result_tape, + state->markpos_block, + state->markpos_offset); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + TuplesortInstrumentation *stats) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + tuplesort_updatemax(state); + + if (state->isMaxSpaceDisk) + stats->spaceType = SORT_SPACE_TYPE_DISK; + else + stats->spaceType = SORT_SPACE_TYPE_MEMORY; + stats->spaceUsed = (state->maxSpace + 1023) / 1024; + + switch (state->maxSpaceStatus) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT; + else + stats->sortMethod = SORT_TYPE_QUICKSORT; + break; + case TSS_SORTEDONTAPE: + stats->sortMethod = SORT_TYPE_EXTERNAL_SORT; + break; + case TSS_FINALMERGE: + stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE; + break; + default: + stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS; + break; + } +} + +/* + * Convert TuplesortMethod to a string. + */ +const char * +tuplesort_method_name(TuplesortMethod m) +{ + switch (m) + { + case SORT_TYPE_STILL_IN_PROGRESS: + return "still in progress"; + case SORT_TYPE_TOP_N_HEAPSORT: + return "top-N heapsort"; + case SORT_TYPE_QUICKSORT: + return "quicksort"; + case SORT_TYPE_EXTERNAL_SORT: + return "external sort"; + case SORT_TYPE_EXTERNAL_MERGE: + return "external merge"; + } + + return "unknown"; +} + +/* + * Convert TuplesortSpaceType to a string. + */ +const char * +tuplesort_space_type_name(TuplesortSpaceType t) +{ + Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY); + return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory"; +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + */ + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + Assert(SERIAL(state)); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount < state->bound) + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup); + } + else + { + /* + * The heap is full. Replace the largest entry with the new + * tuple, or just discard it, if it's larger than anything already + * in the heap. + */ + if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + tuplesort_heap_replace_top(state, &state->memtuples[i]); + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + Assert(SERIAL(state)); + + /* + * We can unheapify in place because each delete-top call will remove the + * largest entry, which we can promptly store in the newly freed slot at + * the end. Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_delete_top(state); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts, and external sort runs. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + Assert(!LEADER(state)); + + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: For some callers, tuple points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples; + int j; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (COMPARETUP(state, tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * Remove the tuple at state->memtuples[0] from the heap. Decrement + * memtupcount, and sift up to maintain the heap invariant. + * + * The caller has already free'd the tuple the top node points to, + * if necessary. + */ +static void +tuplesort_heap_delete_top(Tuplesortstate *state) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + + if (--state->memtupcount <= 0) + return; + + /* + * Remove the last tuple in the heap, and re-insert it, by replacing the + * current top node with it. + */ + tuple = &memtuples[state->memtupcount]; + tuplesort_heap_replace_top(state, tuple); +} + +/* + * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to + * maintain the heap invariant. + * + * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H, + * Heapsort, steps H3-H8). + */ +static void +tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples = state->memtuples; + unsigned int i, + n; + + Assert(state->memtupcount >= 1); + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (COMPARETUP(state, tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(Tuplesortstate *state, int tapenum, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(state->tapeset, tapenum, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(Tuplesortstate *state, int tapenum) +{ + unsigned int len = 0; + + LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. + * + * We use next free slot from the slab allocator, or palloc() if the tuple + * is too large for that. + */ +static void * +readtup_alloc(Tuplesortstate *state, Size tuplen) +{ + SlabSlot *buf; + + /* + * We pre-allocate enough slots in the slab arena that we should never run + * out. + */ + Assert(state->slabFreeHead); + + if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead) + return MemoryContextAlloc(state->sortcontext, tuplen); + else + { + buf = state->slabFreeHead; + /* Reuse this slot */ + state->slabFreeHead = buf->nextfree; + + return buf; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); + } +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(state->tapeset, tapenum, + tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreHeapTuple(ltup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreHeapTuple(rtup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); + } +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This is required for + * btree indexes, since heap TID is treated as an implicit last key + * attribute in order to ensure that all keys in the index are physically + * unique. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + Bucket bucket1; + Bucket bucket2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + Assert(!b->isnull1); + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + if (bucket1 > bucket2) + return 1; + else if (bucket1 < bucket2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_index() should not be called"); +} + +static void +writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); + } +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(state->tapeset, tapenum, + waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + + if (!state->slabAllocatorUsed && stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(state->tapeset, tapenum, + &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); +} + +/* + * Parallel sort routines + */ + +/* + * tuplesort_estimate_shared - estimate required shared memory allocation + * + * nWorkers is an estimate of the number of workers (it's the number that + * will be requested). + */ +Size +tuplesort_estimate_shared(int nWorkers) +{ + Size tapesSize; + + Assert(nWorkers > 0); + + /* Make sure that BufFile shared state is MAXALIGN'd */ + tapesSize = mul_size(sizeof(TapeShare), nWorkers); + tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes))); + + return tapesSize; +} + +/* + * tuplesort_initialize_shared - initialize shared tuplesort state + * + * Must be called from leader process before workers are launched, to + * establish state needed up-front for worker tuplesortstates. nWorkers + * should match the argument passed to tuplesort_estimate_shared(). + */ +void +tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg) +{ + int i; + + Assert(nWorkers > 0); + + SpinLockInit(&shared->mutex); + shared->currentWorker = 0; + shared->workersFinished = 0; + SharedFileSetInit(&shared->fileset, seg); + shared->nTapes = nWorkers; + for (i = 0; i < nWorkers; i++) + { + shared->tapes[i].firstblocknumber = 0L; + } +} + +/* + * tuplesort_attach_shared - attach to shared tuplesort state + * + * Must be called by all worker processes. + */ +void +tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg) +{ + /* Attach to SharedFileSet */ + SharedFileSetAttach(&shared->fileset, seg); +} + +/* + * worker_get_identifier - Assign and return ordinal identifier for worker + * + * The order in which these are assigned is not well defined, and should not + * matter; worker numbers across parallel sort participants need only be + * distinct and gapless. logtape.c requires this. + * + * Note that the identifiers assigned from here have no relation to + * ParallelWorkerNumber number, to avoid making any assumption about + * caller's requirements. However, we do follow the ParallelWorkerNumber + * convention of representing a non-worker with worker number -1. This + * includes the leader, as well as serial Tuplesort processes. + */ +static int +worker_get_identifier(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int worker; + + Assert(WORKER(state)); + + SpinLockAcquire(&shared->mutex); + worker = shared->currentWorker++; + SpinLockRelease(&shared->mutex); + + return worker; +} + +/* + * worker_freeze_result_tape - freeze worker's result tape for leader + * + * This is called by workers just after the result tape has been determined, + * instead of calling LogicalTapeFreeze() directly. They do so because + * workers require a few additional steps over similar serial + * TSS_SORTEDONTAPE external sort cases, which also happen here. The extra + * steps are around freeing now unneeded resources, and representing to + * leader that worker's input run is available for its merge. + * + * There should only be one final output run for each worker, which consists + * of all tuples that were originally input into worker. + */ +static void +worker_freeze_result_tape(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + TapeShare output; + + Assert(WORKER(state)); + Assert(state->result_tape != -1); + Assert(state->memtupcount == 0); + + /* + * Free most remaining memory, in case caller is sensitive to our holding + * on to it. memtuples may not be a tiny merge heap at this point. + */ + pfree(state->memtuples); + /* Be tidy */ + state->memtuples = NULL; + state->memtupsize = 0; + + /* + * Parallel worker requires result tape metadata, which is to be stored in + * shared memory for leader + */ + LogicalTapeFreeze(state->tapeset, state->result_tape, &output); + + /* Store properties of output tape, and update finished worker count */ + SpinLockAcquire(&shared->mutex); + shared->tapes[state->worker] = output; + shared->workersFinished++; + SpinLockRelease(&shared->mutex); +} + +/* + * worker_nomergeruns - dump memtuples in worker, without merging + * + * This called as an alternative to mergeruns() with a worker when no + * merging is required. + */ +static void +worker_nomergeruns(Tuplesortstate *state) +{ + Assert(WORKER(state)); + Assert(state->result_tape == -1); + + state->result_tape = state->tp_tapenum[state->destTape]; + worker_freeze_result_tape(state); +} + +/* + * leader_takeover_tapes - create tapeset for leader from worker tapes + * + * So far, leader Tuplesortstate has performed no actual sorting. By now, all + * sorting has occurred in workers, all of which must have already returned + * from tuplesort_performsort(). + * + * When this returns, leader process is left in a state that is virtually + * indistinguishable from it having generated runs as a serial external sort + * might have. + */ +static void +leader_takeover_tapes(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int nParticipants = state->nParticipants; + int workersFinished; + int j; + + Assert(LEADER(state)); + Assert(nParticipants >= 1); + + SpinLockAcquire(&shared->mutex); + workersFinished = shared->workersFinished; + SpinLockRelease(&shared->mutex); + + if (nParticipants != workersFinished) + elog(ERROR, "cannot take over tapes before all workers finish"); + + /* + * Create the tapeset from worker tapes, including a leader-owned tape at + * the end. Parallel workers are far more expensive than logical tapes, + * so the number of tapes allocated here should never be excessive. + * + * We still have a leader tape, though it's not possible to write to it + * due to restrictions in the shared fileset infrastructure used by + * logtape.c. It will never be written to in practice because + * randomAccess is disallowed for parallel sorts. + */ + inittapestate(state, nParticipants + 1); + state->tapeset = LogicalTapeSetCreate(nParticipants + 1, false, + shared->tapes, &shared->fileset, + state->worker); + + /* mergeruns() relies on currentRun for # of runs (in one-pass cases) */ + state->currentRun = nParticipants; + + /* + * Initialize variables of Algorithm D to be consistent with runs from + * workers having been generated in the leader. + * + * There will always be exactly 1 run per worker, and exactly one input + * tape per run, because workers always output exactly 1 run, even when + * there were no input tuples for workers to sort. + */ + for (j = 0; j < state->maxTapes; j++) + { + /* One real run; no dummy runs for worker tapes */ + state->tp_fib[j] = 1; + state->tp_runs[j] = 1; + state->tp_dummy[j] = 0; + state->tp_tapenum[j] = j; + } + /* Leader tape gets one dummy run, and no real runs */ + state->tp_fib[state->tapeRange] = 0; + state->tp_runs[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 1; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} diff --git a/src/tuplesort14.c b/src/tuplesort14.c new file mode 100644 index 0000000000..b17347b214 --- /dev/null +++ b/src/tuplesort14.c @@ -0,0 +1,4782 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about the external + * sorting algorithm. Historically, we divided the input into sorted runs + * using replacement selection, in the form of a priority tree implemented + * as a heap (essentially his Algorithm 5.2.3H), but now we always use + * quicksort for run generation. We merge the runs using polyphase merge, + * Knuth's Algorithm 5.4.2D. The logical "tapes" used by Algorithm D are + * implemented by logtape.c, which avoids space wastage by recycling disk + * space as soon as each block is read from its "tape". + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape (selected per Algorithm D). After the end of the + * input is reached, we dump out remaining tuples in memory into a final run, + * then merge the runs using Algorithm D. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and replace it + * with the next tuple from its source tape (if any). When the heap empties, + * the merge is complete. The basic merge algorithm thus needs very little + * memory --- only M tuples for an M-way merge, and M is constrained to a + * small number. However, we can still make good use of our full workMem + * allocation by pre-reading additional blocks from each source tape. Without + * prereading, our access pattern to the temporary file would be very erratic; + * on average we'd read one block from each of M source tapes during the same + * time that we're writing M blocks to the output tape, so there is no + * sequentiality of access at all, defeating the read-ahead methods used by + * most Unix kernels. Worse, the output tape gets written into a very random + * sequence of blocks of the temp file, ensuring that things will be even + * worse when it comes time to read that tape. A straightforward merge pass + * thus ends up doing a lot of waiting for disk seeks. We can improve matters + * by prereading from each source tape sequentially, loading about workMem/M + * bytes from each tape in turn, and making the sequential blocks immediately + * available for reuse. This approach helps to localize both read and write + * accesses. The pre-reading is handled by logtape.c, we just tell it how + * much memory to use for the buffers. + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the + * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according + * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that + * tape drives are expensive beasts, and in particular that there will always + * be many more runs than tape drives. In our implementation a "tape drive" + * doesn't cost much more than a few Kb of memory buffers, so we can afford + * to have lots of them. In particular, if we can have as many tape drives + * as sorted runs, we can eliminate any repeated I/O at all. In the current + * code we determine the number of tapes M on the basis of workMem: we want + * workMem/M to be large enough that we read a fair amount of data each time + * we preread from a tape, so as to maintain the locality of access described + * above. Nonetheless, with large workMem we can have many tapes (but not + * too many -- see the comments in tuplesort_merge_order). + * + * This module supports parallel sorting. Parallel sorts involve coordination + * among one or more worker processes, and a leader process, each with its own + * tuplesort state. The leader process (or, more accurately, the + * Tuplesortstate associated with a leader process) creates a full tapeset + * consisting of worker tapes with one run to merge; a run for every + * worker process. This is then merged. Worker processes are guaranteed to + * produce exactly one output run from their partial input. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* Sort parallel code from state for sort__start probes */ +#define PARALLEL_SORT(state) ((state)->shared == NULL ? 0 : \ + (state)->worker >= 0 ? 1 : 2) + +/* + * Initial size of memtuples array. We're trying to select this size so that + * array doesn't exceed ALLOCSET_SEPARATE_THRESHOLD and so that the overhead of + * allocation might possibly be lowered. However, we don't consider array sizes + * less than 1024. + * + */ +#define INITIAL_MEMTUPSIZE Max(1024, \ + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1) + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during merge, when we use a + * simple slab allocator). SortTuples also contain the tuple's first key + * column in Datum/nullflag format, and a source/input tape number that + * tracks which tape each heap element/slot belongs to during merging. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int srctape; /* source tape number */ +} SortTuple; + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much data we'd like to read from each input + * tape during a preread cycle (see discussion at top of file). + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* number of tapes (Knuth's T) */ + int tapeRange; /* maxTapes-1 (Knuth's P) */ + int64 maxSpace; /* maximum amount of space occupied among sort + * of groups, either in-memory or on-disk */ + bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk + * space, false when it's value for in-memory + * space */ + TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ + MemoryContext maincontext; /* memory context for tuple sort metadata that + * persists across multiple batches */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. Unless the slab allocator is + * used, after writing the tuple, pfree() the out-of-line data (not the + * SortTuple struct!), and increase state->availMem by the amount of + * memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, int tapenum, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. The tuple is allocated + * from the slab memory arena, or is palloc'd, see readtup_alloc(). + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Buffer size to use for reading input tapes, during merge. */ + size_t read_buffer_size; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Unless otherwise noted, all pointer variables below are pointers to + * arrays of length maxTapes, holding per-tape data. + */ + + /* + * This variable is only used during merge passes. mergeactive[i] is true + * if we are reading an input run from (actual) tape number i and have not + * yet exhausted that run. + */ + bool *mergeactive; /* active input run source? */ + + /* + * Variables for Algorithm D. Note that destTape is a "logical" tape + * number, ie, an index into the tp_xxx[] arrays. Be careful to keep + * "logical" and "actual" tape numbers straight! + */ + int Level; /* Knuth's l */ + int destTape; /* current output tape (Knuth's j, less 1) */ + int *tp_fib; /* Target Fibonacci run counts (A[]) */ + int *tp_runs; /* # of real runs on each tape */ + int *tp_dummy; /* # of dummy runs for each tape (D[]) */ + int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ + int activeTapes; /* # of active input tapes in merge pass */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + int result_tape; /* actual tape number of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run leader can merge. Typically includes a worker state held + * by the leader process itself. Set in the leader Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +/* + * Private mutable state of tuplesort-parallel-operation. This is allocated + * in shared memory. + */ +struct Sharedsort +{ + /* mutex protects all fields prior to tapes */ + slock_t mutex; + + /* + * currentWorker generates ordinal identifier numbers for parallel sort + * workers. These start from 0, and are always gapless. + * + * Workers increment workersFinished to indicate having finished. If this + * is equal to state.nParticipants within the leader, leader is ready to + * merge worker runs. + */ + int currentWorker; + int workersFinished; + + /* Temporary file space */ + SharedFileSet fileset; + + /* Size of tapes flexible array */ + int nTapes; + + /* + * Tapes array used by workers to report back information needed by the + * leader to concatenate all worker tapes into one for merging + */ + TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) +#define SERIAL(state) ((state)->shared == NULL) +#define WORKER(state) ((state)->shared && (state)->worker != -1) +#define LEADER(state) ((state)->shared && (state)->worker == -1) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on copytup routines to use the + * correct memory context for these tuples (and to not use the reset + * context for anything whose lifetime needs to span multiple external + * sort runs). readtup routines use the slab allocator (they cannot use + * the reset context because it gets deleted at the point that merging + * begins). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ + do { \ + if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, + SortCoordinate coordinate, + bool randomAccess); +static void tuplesort_begin_batch(Tuplesortstate *state); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state, bool mergeruns); +static void inittapestate(Tuplesortstate *state, int maxTapes); +static void selectnewtape(Tuplesortstate *state); +static void init_slab_allocator(Tuplesortstate *state, int numSlots); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state); +static bool mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_delete_top(Tuplesortstate *state); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK); +static void markrunend(Tuplesortstate *state, int tapenum); +static void *readtup_alloc(Tuplesortstate *state, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static int worker_get_identifier(Tuplesortstate *state); +static void worker_freeze_result_tape(Tuplesortstate *state); +static void worker_nomergeruns(Tuplesortstate *state); +static void leader_takeover_tapes(Tuplesortstate *state); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); +static void tuplesort_free(Tuplesortstate *state); +static void tuplesort_updatemax(Tuplesortstate *state); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ + +#define ST_SORT qsort_tuple +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE_RUNTIME_POINTER +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DECLARE +#define ST_DEFINE +#include "lib/sort_template.h" + +#define ST_SORT qsort_ssup +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, ssup) \ + ApplySortComparator((a)->datum1, (a)->isnull1, \ + (b)->datum1, (b)->isnull1, (ssup)) +#define ST_COMPARE_ARG_TYPE SortSupportData +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext maincontext; + MemoryContext sortcontext; + MemoryContext oldcontext; + + /* See leader_takeover_tapes() remarks on randomAccess support */ + if (coordinate && randomAccess) + elog(ERROR, "random access disallowed under parallel sort"); + + /* + * Memory context surviving tuplesort_reset. This memory context holds + * data which is useful to keep while sorting multiple similar batches. + */ + maincontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Create a working memory context for one sort operation. The content of + * this context is deleted by tuplesort_reset. + */ + sortcontext = AllocSetContextCreate(maincontext, + "TupleSort sort", + ALLOCSET_DEFAULT_SIZES); + + /* + * Additionally a working memory context for tuples is setup in + * tuplesort_begin_batch. + */ + + /* + * Make the Tuplesortstate within the per-sortstate context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(maincontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->randomAccess = randomAccess; + state->tuples = true; + + /* + * workMem is forced to be at least 64KB, the current minimum valid value + * for the work_mem GUC. This is a defense against parallel sort callers + * that divide out memory among many workers in a way that leaves each + * with very little memory. + */ + state->allowedMem = Max(workMem, 64) * (int64) 1024; + state->sortcontext = sortcontext; + state->maincontext = maincontext; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = INITIAL_MEMTUPSIZE; + state->memtuples = NULL; + + /* + * After all of the other non-parallel-related state, we setup all of the + * state needed for each batch. + */ + tuplesort_begin_batch(state); + + /* + * Initialize parallel-related state based on coordination information + * from caller + */ + if (!coordinate) + { + /* Serial sort */ + state->shared = NULL; + state->worker = -1; + state->nParticipants = -1; + } + else if (coordinate->isWorker) + { + /* Parallel worker produces exactly one final run from all input */ + state->shared = coordinate->sharedsort; + state->worker = worker_get_identifier(state); + state->nParticipants = -1; + } + else + { + /* Parallel leader state only used for final merge */ + state->shared = coordinate->sharedsort; + state->worker = -1; + state->nParticipants = coordinate->nParticipants; + Assert(state->nParticipants >= 1); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_begin_batch + * + * Setup, or reset, all state need for processing a new set of tuples with this + * sort state. Called both from tuplesort_begin_common (the first time sorting + * with this sort state) and tuplesort_reset (for subsequent usages). + */ +static void +tuplesort_begin_batch(Tuplesortstate *state) +{ + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + state->tuplecontext = AllocSetContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + state->status = TSS_INITIAL; + state->bounded = false; + state->boundUsed = false; + + state->availMem = state->allowedMem; + + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->growmemtuples = true; + state->slabAllocatorUsed = false; + if (state->memtuples != NULL && state->memtupsize != INITIAL_MEMTUPSIZE) + { + pfree(state->memtuples); + state->memtuples = NULL; + state->memtupsize = INITIAL_MEMTUPSIZE; + } + if (state->memtuples == NULL) + { + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + } + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = 0; + + /* + * maxTapes, tapeRange, and Algorithm D variables will be initialized by + * inittapes(), if needed + */ + + state->result_tape = -1; /* flag that result tape has not been formed */ + + MemoryContextSwitchTo(oldcontext); +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = indexRel->rd_indcollation[i]; + sortKey->ssup_nulls_first = false; + sortKey->ssup_attno = i + 1; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + /* Look for a sort support function */ + PrepareSortSupportFromGistIndexRel(indexRel, sortKey); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple as needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each SortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. Parallel leader tuplesorts will always ignore the hint. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL && state->memtupcount == 0); + /* Can't set the bound twice, either */ + Assert(!state->bounded); + /* Also, this shouldn't be called in a parallel worker */ + Assert(!WORKER(state)); + + /* Parallel leader allows but ignores hint */ + if (LEADER(state)) + return; + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_used_bound + * + * Allow callers to find out if the sort state was able to use a bound. + */ +bool +tuplesort_used_bound(Tuplesortstate *state) +{ + return state->boundUsed; +} + +/* + * tuplesort_free + * + * Internal routine for freeing resources of tuplesort. + */ +static void +tuplesort_free(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", + SERIAL(state) ? "external sort" : "parallel external sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "%s of worker %d ended, %ld KB used: %s", + SERIAL(state) ? "internal sort" : "unperformed parallel sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory. + */ + MemoryContextReset(state->sortcontext); +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + tuplesort_free(state); + + /* + * Free the main memory context, including the Tuplesortstate struct + * itself. + */ + MemoryContextDelete(state->maincontext); +} + +/* + * tuplesort_updatemax + * + * Update maximum resource usage statistics. + */ +static void +tuplesort_updatemax(Tuplesortstate *state) +{ + int64 spaceUsed; + bool isSpaceDisk; + + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + isSpaceDisk = true; + spaceUsed = LogicalTapeSetBlocks(state->tapeset) * BLCKSZ; + } + else + { + isSpaceDisk = false; + spaceUsed = state->allowedMem - state->availMem; + } + + /* + * Sort evicts data to the disk when it wasn't able to fit that data into + * main memory. This is why we assume space used on the disk to be more + * important for tracking resource usage than space used in memory. Note + * that the amount of space occupied by some tupleset on the disk might be + * less than amount of space occupied by the same tupleset in memory due + * to more compact representation. + */ + if ((isSpaceDisk && !state->isMaxSpaceDisk) || + (isSpaceDisk == state->isMaxSpaceDisk && spaceUsed > state->maxSpace)) + { + state->maxSpace = spaceUsed; + state->isMaxSpaceDisk = isSpaceDisk; + state->maxSpaceStatus = state->status; + } +} + +/* + * tuplesort_reset + * + * Reset the tuplesort. Reset all the data in the tuplesort, but leave the + * meta-information in. After tuplesort_reset, tuplesort is ready to start + * a new sort. This allows avoiding recreation of tuple sort states (and + * save resources) when sorting multiple small batches. + */ +void +tuplesort_reset(Tuplesortstate *state) +{ + tuplesort_updatemax(state); + tuplesort_free(state); + + /* + * After we've freed up per-batch memory, re-setup all of the state common + * to both the first batch and any subsequent batch. + */ + tuplesort_begin_batch(state); + + state->lastReturnedTuple = NULL; + state->slabMemoryBegin = NULL; + state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return true if we were able to enlarge the array, false if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + Assert(!LEADER(state)); + + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state, true); + + /* + * Dump all tuples. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, replacing it with the new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_replace_top(state, tuple); + } + break; + + case TSS_BUILDRUNS: + + /* + * Save the tuple into the unsorted array (there must be space) + */ + state->memtuples[state->memtupcount++] = *tuple; + + /* + * If we are over the memory limit, dump all tuples. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort of worker %d starting: %s", + state->worker, pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory, or leader to take over worker tapes + */ + if (SERIAL(state)) + { + /* Just qsort 'em and we're done */ + tuplesort_sort_memtuples(state); + state->status = TSS_SORTEDINMEM; + } + else if (WORKER(state)) + { + /* + * Parallel workers must still dump out tuples to tape. No + * merge is required to produce single output run, though. + */ + inittapes(state, false); + dumptuples(state, true); + worker_nomergeruns(state); + state->status = TSS_SORTEDONTAPE; + } + else + { + /* + * Leader will take over worker tapes and merge worker runs. + * Note that mergeruns sets the correct state->status. + */ + leader_takeover_tapes(state); + mergeruns(state); + } + state->current = 0; + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess and !WORKER(), one run per tape). + * Note that mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort of worker %d done (except %d-way final merge): %s", + state->worker, state->activeTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort of worker %d done: %s", + state->worker, pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns false if no more tuples. + * Returned tuple belongs to tuplesort memory context, and must not be freed + * by caller. Note that fetched tuple is stored in memory that may be + * recycled by any future fetch. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup) +{ + unsigned int tuplen; + size_t nmoved; + + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->slabAllocatorUsed); + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(state->slabAllocatorUsed); + + /* + * The slot that held the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + if (forward) + { + if (state->eof_reached) + return false; + + if ((tuplen = getlen(state, state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle + * its memory on next call. (This can be NULL, in the + * !state->tuples case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + 2 * sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != 2 * sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + tuplen = getlen(state, state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + 2 * sizeof(unsigned int)); + if (nmoved == tuplen + sizeof(unsigned int)) + { + /* + * We backed up over the previous tuple, but there was no + * ending length word before it. That means that the prev + * tuple is the first tuple in the file. It is now the + * next to read in forward direction (not obviously right, + * but that is what in-memory case does). + */ + return false; + } + else if (nmoved != tuplen + 2 * sizeof(unsigned int)) + elog(ERROR, "bogus tuple length in backward scan"); + } + + tuplen = getlen(state, state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + nmoved = LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen); + if (nmoved != tuplen) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle its memory + * on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + + case TSS_FINALMERGE: + Assert(forward); + /* We are managing memory ourselves, with the slab allocator. */ + Assert(state->slabAllocatorUsed); + + /* + * The slab slot holding the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTape = state->memtuples[0].srctape; + SortTuple newtup; + + *stup = state->memtuples[0]; + + /* + * Remember the tuple we return, so that we can recycle its + * memory on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + /* + * Pull next tuple from tape, and replace the returned tuple + * at top of the heap with it. + */ + if (!mergereadnext(state, srcTape, &newtup)) + { + /* + * If no more data, we've reached end of run on this tape. + * Remove the top node from the heap. + */ + tuplesort_heap_delete_top(state); + + /* + * Rewind to free the read buffer. It'd go away at the + * end of the sort anyway, but better to release the + * memory early. + */ + LogicalTapeRewindForWrite(state->tapeset, srcTape); + return true; + } + newtup.srctape = srcTape; + tuplesort_heap_replace_top(state, &newtup); + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return true; else, clear the slot + * and return false. + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * If copy is true, the slot receives a tuple that's been copied into the + * caller's memory context, so that it will stay valid regardless of future + * manipulations of the tuplesort's state (up to and including deleting the + * tuplesort). If copy is false, the slot will just receive a pointer to a + * tuple held within the tuplesort, which is more efficient, but only safe for + * callers that are prepared to have any subsequent manipulation of the + * tuplesort's state invalidate slot contents. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (copy) + stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); + + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns false if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* use stup.tuple because stup.datum1 may be an abbreviation */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns true if successful, false if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /* + * We need one tape for each merge input, plus another one for the output, + * and each of these tapes needs buffer space. In addition we want + * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't + * count). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + */ + mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / + (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); + + /* + * Even in minimum memory, use at least a MINORDER merge. On the other + * hand, even when we have lots of memory, do not use more than a MAXORDER + * merge. Tapes are pretty cheap, but they're not entirely free. Each + * additional tape reduces the amount of memory available to build runs, + * which in turn can cause the same sort to need more runs, which makes + * merging slower even if it can still be done in a single pass. Also, + * high order merges are quite slow due to CPU cache effects; it can be + * faster to pay the I/O cost of a polyphase merge than to perform a + * single merge pass across many hundreds of tapes. + */ + mOrder = Max(mOrder, MINORDER); + mOrder = Min(mOrder, MAXORDER); + + return mOrder; +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we won't sort in memory. + */ +static void +inittapes(Tuplesortstate *state, bool mergeruns) +{ + int maxTapes, + j; + + Assert(!LEADER(state)); + + if (mergeruns) + { + /* Compute number of tapes to use: merge order plus 1 */ + maxTapes = tuplesort_merge_order(state->allowedMem) + 1; + } + else + { + /* Workers can sometimes produce single run, output without merge */ + Assert(WORKER(state)); + maxTapes = MINORDER + 1; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d switching to external sort with %d tapes: %s", + state->worker, maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* Create the tape set and allocate the per-tape data arrays */ + inittapestate(state, maxTapes); + state->tapeset = + LogicalTapeSetCreate(maxTapes, false, NULL, + state->shared ? &state->shared->fileset : NULL, + state->worker); + + state->currentRun = 0; + + /* + * Initialize variables of Algorithm D (step D1). + */ + for (j = 0; j < maxTapes; j++) + { + state->tp_fib[j] = 1; + state->tp_runs[j] = 0; + state->tp_dummy[j] = 1; + state->tp_tapenum[j] = j; + } + state->tp_fib[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 0; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * inittapestate - initialize generic tape management state + */ +static void +inittapestate(Tuplesortstate *state, int maxTapes) +{ + int64 tapeSpace; + + /* + * Decrease availMem to reflect the space needed for tape buffers; but + * don't decrease it to the point that we have no room for tuples. (That + * case is only likely to occur if sorting pass-by-value Datums; in all + * other scenarios the memtuples[] array is unlikely to occupy more than + * half of allowedMem. In the pass-by-value case it's not important to + * account for tuple space, so we don't care if LACKMEM becomes + * inaccurate.) + */ + tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. For parallel sorts, this should have been + * called already, but it doesn't matter if it is called a second time. + */ + PrepareTempTablespaces(); + + state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); + state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); + + /* Record # of tapes allocated (for duration of sort) */ + state->maxTapes = maxTapes; + /* Record maximum # of tapes usable as inputs when merging */ + state->tapeRange = maxTapes - 1; +} + +/* + * selectnewtape -- select new tape for new initial run. + * + * This is called after finishing a run when we know another run + * must be started. This implements steps D3, D4 of Algorithm D. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + int j; + int a; + + /* Step D3: advance j (destTape) */ + if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) + { + state->destTape++; + return; + } + if (state->tp_dummy[state->destTape] != 0) + { + state->destTape = 0; + return; + } + + /* Step D4: increase level */ + state->Level++; + a = state->tp_fib[0]; + for (j = 0; j < state->tapeRange; j++) + { + state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; + state->tp_fib[j] = a + state->tp_fib[j + 1]; + } + state->destTape = 0; +} + +/* + * Initialize the slab allocation arena, for the given number of slots. + */ +static void +init_slab_allocator(Tuplesortstate *state, int numSlots) +{ + if (numSlots > 0) + { + char *p; + int i; + + state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE); + state->slabMemoryEnd = state->slabMemoryBegin + + numSlots * SLAB_SLOT_SIZE; + state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin; + USEMEM(state, numSlots * SLAB_SLOT_SIZE); + + p = state->slabMemoryBegin; + for (i = 0; i < numSlots - 1; i++) + { + ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE); + p += SLAB_SLOT_SIZE; + } + ((SlabSlot *) p)->nextfree = NULL; + } + else + { + state->slabMemoryBegin = state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; + } + state->slabAllocatorUsed = true; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements steps D5, D6 of Algorithm D. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum, + svTape, + svRuns, + svDummy; + int numTapes; + int numInputTapes; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * Reset tuple memory. We've freed all the tuples that we previously + * allocated. We will use the slab allocator from now on. + */ + MemoryContextResetOnly(state->tuplecontext); + + /* + * We no longer need a large memtuples array. (We will allocate a smaller + * one for the heap later.) + */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + pfree(state->memtuples); + state->memtuples = NULL; + + /* + * If we had fewer runs than tapes, refund the memory that we imagined we + * would need for the tape buffers of the unused tapes. + * + * numTapes and numInputTapes reflect the actual number of tapes we will + * use. Note that the output tape's tape number is maxTapes - 1, so the + * tape numbers of the used tapes are not consecutive, and you cannot just + * loop from 0 to numTapes to visit all used tapes! + */ + if (state->Level == 1) + { + numInputTapes = state->currentRun; + numTapes = numInputTapes + 1; + FREEMEM(state, (state->maxTapes - numTapes) * TAPE_BUFFER_OVERHEAD); + } + else + { + numInputTapes = state->tapeRange; + numTapes = state->maxTapes; + } + + /* + * Initialize the slab allocator. We need one slab slot per input tape, + * for the tuples in the heap, plus one to hold the tuple last returned + * from tuplesort_gettuple. (If we're sorting pass-by-val Datums, + * however, we don't need to do allocate anything.) + * + * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism + * to track memory usage of individual tuples. + */ + if (state->tuples) + init_slab_allocator(state, numInputTapes + 1); + else + init_slab_allocator(state, 0); + + /* + * Allocate a new 'memtuples' array, for the heap. It will hold one tuple + * from each input tape. + */ + state->memtupsize = numInputTapes; + state->memtuples = (SortTuple *) MemoryContextAlloc(state->maincontext, + numInputTapes * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* + * Use all the remaining memory we have available for read buffers among + * the input tapes. + * + * We don't try to "rebalance" the memory among tapes, when we start a new + * merge phase, even if some tapes are inactive in the new phase. That + * would be hard, because logtape.c doesn't know where one run ends and + * another begins. When a new merge phase begins, and a tape doesn't + * participate in it, its buffer nevertheless already contains tuples from + * the next run on same tape, so we cannot release the buffer. That's OK + * in practice, merge performance isn't that sensitive to the amount of + * buffers used, and most merge phases use all or almost all tapes, + * anyway. + */ +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d using " INT64_FORMAT " KB of memory for read buffers among %d input tapes", + state->worker, state->availMem / 1024, numInputTapes); +#endif + + state->read_buffer_size = Max(state->availMem / numInputTapes, 0); + USEMEM(state, state->read_buffer_size * numInputTapes); + + /* End of step D2: rewind all output tapes to prepare for merging */ + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + LogicalTapeRewindForRead(state->tapeset, tapenum, state->read_buffer_size); + + for (;;) + { + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess && !WORKER(state)) + { + bool allOneRun = true; + + Assert(state->tp_runs[state->tapeRange] == 0); + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { + allOneRun = false; + break; + } + } + if (allOneRun) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + + if (allDummy) + { + state->tp_dummy[state->tapeRange]++; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + state->tp_dummy[tapenum]--; + } + else + mergeonerun(state); + } + + /* Step D6: decrease level */ + if (--state->Level == 0) + break; + /* rewind output tape T to use as new input */ + LogicalTapeRewindForRead(state->tapeset, state->tp_tapenum[state->tapeRange], + state->read_buffer_size); + /* rewind used-up input tape P, and prepare it for write pass */ + LogicalTapeRewindForWrite(state->tapeset, state->tp_tapenum[state->tapeRange - 1]); + state->tp_runs[state->tapeRange - 1] = 0; + + /* + * reassign tape units per step D6; note we no longer care about A[] + */ + svTape = state->tp_tapenum[state->tapeRange]; + svDummy = state->tp_dummy[state->tapeRange]; + svRuns = state->tp_runs[state->tapeRange]; + for (tapenum = state->tapeRange; tapenum > 0; tapenum--) + { + state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; + state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; + state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; + } + state->tp_tapenum[0] = svTape; + state->tp_dummy[0] = svDummy; + state->tp_runs[0] = svRuns; + } + + /* + * Done. Knuth says that the result is on TAPE[1], but since we exited + * the loop without performing the last iteration of step D6, we have not + * rearranged the tape unit assignment, and therefore the result is on + * TAPE[T]. We need to do it this way so that we can freeze the final + * output tape while rewinding it. The last iteration of step D6 would be + * a waste of cycles anyway... + */ + state->result_tape = state->tp_tapenum[state->tapeRange]; + if (!WORKER(state)) + LogicalTapeFreeze(state->tapeset, state->result_tape, NULL); + else + worker_freeze_result_tape(state); + state->status = TSS_SORTEDONTAPE; + + /* Release the read buffers of all the other tapes, by rewinding them. */ + for (tapenum = 0; tapenum < state->maxTapes; tapenum++) + { + if (tapenum != state->result_tape) + LogicalTapeRewindForWrite(state->tapeset, tapenum); + } +} + +/* + * Merge one run from each input tape, except ones with dummy runs. + * + * This is the inner loop of Algorithm D step D5. We know that the + * output tape is TAPE[T]. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int destTape = state->tp_tapenum[state->tapeRange]; + int srcTape; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. We can also decrease the input run/dummy run counts. + */ + beginmerge(state); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + SortTuple stup; + + /* write the tuple to destTape */ + srcTape = state->memtuples[0].srctape; + WRITETUP(state, destTape, &state->memtuples[0]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[0].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple); + + /* + * pull next tuple from the tape, and replace the written-out tuple in + * the heap with it. + */ + if (mergereadnext(state, srcTape, &stup)) + { + stup.srctape = srcTape; + tuplesort_heap_replace_top(state, &stup); + } + else + tuplesort_heap_delete_top(state); + } + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape, and increment its count of real runs. + */ + markrunend(state, destTape); + state->tp_runs[state->tapeRange]++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished %d-way merge step: %s", state->worker, + state->activeTapes, pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * beginmerge - initialize for a merge pass + * + * We decrease the counts of real and dummy runs for each tape, and mark + * which tapes contain active input runs in mergeactive[]. Then, fill the + * merge heap with the first tuple from each active tape. + */ +static void +beginmerge(Tuplesortstate *state) +{ + int activeTapes; + int tapenum; + int srcTape; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + /* Adjust run counts and mark the active tapes */ + memset(state->mergeactive, 0, + state->maxTapes * sizeof(*state->mergeactive)); + activeTapes = 0; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] > 0) + state->tp_dummy[tapenum]--; + else + { + Assert(state->tp_runs[tapenum] > 0); + state->tp_runs[tapenum]--; + srcTape = state->tp_tapenum[tapenum]; + state->mergeactive[srcTape] = true; + activeTapes++; + } + } + Assert(activeTapes > 0); + state->activeTapes = activeTapes; + + /* Load the merge heap with the first tuple from each input tape */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + SortTuple tup; + + if (mergereadnext(state, srcTape, &tup)) + { + tup.srctape = srcTape; + tuplesort_heap_insert(state, &tup); + } + } +} + +/* + * mergereadnext - read next tuple from one merge input tape + * + * Returns false on EOF. + */ +static bool +mergereadnext(Tuplesortstate *state, int srcTape, SortTuple *stup) +{ + unsigned int tuplen; + + if (!state->mergeactive[srcTape]) + return false; /* tape's run is already exhausted */ + + /* read next tuple, if any */ + if ((tuplen = getlen(state, srcTape, true)) == 0) + { + state->mergeactive[srcTape] = false; + return false; + } + READTUP(state, stup, srcTape, tuplen); + + return true; +} + +/* + * dumptuples - remove tuples from memtuples and write initial run to tape + * + * When alltuples = true, dump everything currently in memory. (This case is + * only used at end of input data.) + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Nothing to do if we still fit in available memory and have array slots, + * unless this is the final call during initial run generation. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state) && + !alltuples) + return; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. + * + * In general, short final runs are quite possible. Rather than allowing + * a special case where there was a superfluous selectnewtape() call (i.e. + * a call with no subsequent run actually written to destTape), we prefer + * to write out a 0 tuple run. + * + * mergereadnext() is prepared for 0 tuple runs, and will reliably mark + * the tape inactive for the merge when called from beginmerge(). This + * case is therefore similar to the case where mergeonerun() finds a dummy + * run for the tape, and so doesn't need to merge a run from the tape (or + * conceptually "merges" the dummy run, if you prefer). According to + * Knuth, Algorithm D "isn't strictly optimal" in its method of + * distribution and dummy run assignment; this edge case seems very + * unlikely to make that appreciably worse. + */ + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d starting quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in the sizes of incoming tuples. Fragmentation due to + * AllocSetFree's bucketing by size class might be particularly bad if + * this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state, state->tp_tapenum[state->destTape]); + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished writing run %d to tape %d: %s", + state->worker, state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + if (!alltuples) + selectnewtape(state); +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewindForRead(state->tapeset, + state->result_tape, + 0); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->tapeset, + state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + LogicalTapeSeek(state->tapeset, + state->result_tape, + state->markpos_block, + state->markpos_offset); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + TuplesortInstrumentation *stats) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + tuplesort_updatemax(state); + + if (state->isMaxSpaceDisk) + stats->spaceType = SORT_SPACE_TYPE_DISK; + else + stats->spaceType = SORT_SPACE_TYPE_MEMORY; + stats->spaceUsed = (state->maxSpace + 1023) / 1024; + + switch (state->maxSpaceStatus) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT; + else + stats->sortMethod = SORT_TYPE_QUICKSORT; + break; + case TSS_SORTEDONTAPE: + stats->sortMethod = SORT_TYPE_EXTERNAL_SORT; + break; + case TSS_FINALMERGE: + stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE; + break; + default: + stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS; + break; + } +} + +/* + * Convert TuplesortMethod to a string. + */ +const char * +tuplesort_method_name(TuplesortMethod m) +{ + switch (m) + { + case SORT_TYPE_STILL_IN_PROGRESS: + return "still in progress"; + case SORT_TYPE_TOP_N_HEAPSORT: + return "top-N heapsort"; + case SORT_TYPE_QUICKSORT: + return "quicksort"; + case SORT_TYPE_EXTERNAL_SORT: + return "external sort"; + case SORT_TYPE_EXTERNAL_MERGE: + return "external merge"; + } + + return "unknown"; +} + +/* + * Convert TuplesortSpaceType to a string. + */ +const char * +tuplesort_space_type_name(TuplesortSpaceType t) +{ + Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY); + return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory"; +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + */ + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + Assert(SERIAL(state)); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount < state->bound) + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup); + } + else + { + /* + * The heap is full. Replace the largest entry with the new + * tuple, or just discard it, if it's larger than anything already + * in the heap. + */ + if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + tuplesort_heap_replace_top(state, &state->memtuples[i]); + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + Assert(SERIAL(state)); + + /* + * We can unheapify in place because each delete-top call will remove the + * largest entry, which we can promptly store in the newly freed slot at + * the end. Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_delete_top(state); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts, and external sort runs. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + Assert(!LEADER(state)); + + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: For some callers, tuple points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples; + int j; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (COMPARETUP(state, tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * Remove the tuple at state->memtuples[0] from the heap. Decrement + * memtupcount, and sift up to maintain the heap invariant. + * + * The caller has already free'd the tuple the top node points to, + * if necessary. + */ +static void +tuplesort_heap_delete_top(Tuplesortstate *state) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + + if (--state->memtupcount <= 0) + return; + + /* + * Remove the last tuple in the heap, and re-insert it, by replacing the + * current top node with it. + */ + tuple = &memtuples[state->memtupcount]; + tuplesort_heap_replace_top(state, tuple); +} + +/* + * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to + * maintain the heap invariant. + * + * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H, + * Heapsort, steps H3-H8). + */ +static void +tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples = state->memtuples; + unsigned int i, + n; + + Assert(state->memtupcount >= 1); + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (COMPARETUP(state, tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(Tuplesortstate *state, int tapenum, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(state->tapeset, tapenum, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(Tuplesortstate *state, int tapenum) +{ + unsigned int len = 0; + + LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. + * + * We use next free slot from the slab allocator, or palloc() if the tuple + * is too large for that. + */ +static void * +readtup_alloc(Tuplesortstate *state, Size tuplen) +{ + SlabSlot *buf; + + /* + * We pre-allocate enough slots in the slab arena that we should never run + * out. + */ + Assert(state->slabFreeHead); + + if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead) + return MemoryContextAlloc(state->sortcontext, tuplen); + else + { + buf = state->slabFreeHead; + /* Reuse this slot */ + state->slabFreeHead = buf->nextfree; + + return buf; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); + } +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(state->tapeset, tapenum, + tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreHeapTuple(ltup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreHeapTuple(rtup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); + } +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This is required for + * btree indexes, since heap TID is treated as an implicit last key + * attribute in order to ensure that all keys in the index are physically + * unique. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + Bucket bucket1; + Bucket bucket2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + Assert(!b->isnull1); + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + if (bucket1 > bucket2) + return 1; + else if (bucket1 < bucket2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_index() should not be called"); +} + +static void +writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); + } +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(state->tapeset, tapenum, + waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + + if (!state->slabAllocatorUsed && stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(state->tapeset, tapenum, + &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); +} + +/* + * Parallel sort routines + */ + +/* + * tuplesort_estimate_shared - estimate required shared memory allocation + * + * nWorkers is an estimate of the number of workers (it's the number that + * will be requested). + */ +Size +tuplesort_estimate_shared(int nWorkers) +{ + Size tapesSize; + + Assert(nWorkers > 0); + + /* Make sure that BufFile shared state is MAXALIGN'd */ + tapesSize = mul_size(sizeof(TapeShare), nWorkers); + tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes))); + + return tapesSize; +} + +/* + * tuplesort_initialize_shared - initialize shared tuplesort state + * + * Must be called from leader process before workers are launched, to + * establish state needed up-front for worker tuplesortstates. nWorkers + * should match the argument passed to tuplesort_estimate_shared(). + */ +void +tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg) +{ + int i; + + Assert(nWorkers > 0); + + SpinLockInit(&shared->mutex); + shared->currentWorker = 0; + shared->workersFinished = 0; + SharedFileSetInit(&shared->fileset, seg); + shared->nTapes = nWorkers; + for (i = 0; i < nWorkers; i++) + { + shared->tapes[i].firstblocknumber = 0L; + } +} + +/* + * tuplesort_attach_shared - attach to shared tuplesort state + * + * Must be called by all worker processes. + */ +void +tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg) +{ + /* Attach to SharedFileSet */ + SharedFileSetAttach(&shared->fileset, seg); +} + +/* + * worker_get_identifier - Assign and return ordinal identifier for worker + * + * The order in which these are assigned is not well defined, and should not + * matter; worker numbers across parallel sort participants need only be + * distinct and gapless. logtape.c requires this. + * + * Note that the identifiers assigned from here have no relation to + * ParallelWorkerNumber number, to avoid making any assumption about + * caller's requirements. However, we do follow the ParallelWorkerNumber + * convention of representing a non-worker with worker number -1. This + * includes the leader, as well as serial Tuplesort processes. + */ +static int +worker_get_identifier(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int worker; + + Assert(WORKER(state)); + + SpinLockAcquire(&shared->mutex); + worker = shared->currentWorker++; + SpinLockRelease(&shared->mutex); + + return worker; +} + +/* + * worker_freeze_result_tape - freeze worker's result tape for leader + * + * This is called by workers just after the result tape has been determined, + * instead of calling LogicalTapeFreeze() directly. They do so because + * workers require a few additional steps over similar serial + * TSS_SORTEDONTAPE external sort cases, which also happen here. The extra + * steps are around freeing now unneeded resources, and representing to + * leader that worker's input run is available for its merge. + * + * There should only be one final output run for each worker, which consists + * of all tuples that were originally input into worker. + */ +static void +worker_freeze_result_tape(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + TapeShare output; + + Assert(WORKER(state)); + Assert(state->result_tape != -1); + Assert(state->memtupcount == 0); + + /* + * Free most remaining memory, in case caller is sensitive to our holding + * on to it. memtuples may not be a tiny merge heap at this point. + */ + pfree(state->memtuples); + /* Be tidy */ + state->memtuples = NULL; + state->memtupsize = 0; + + /* + * Parallel worker requires result tape metadata, which is to be stored in + * shared memory for leader + */ + LogicalTapeFreeze(state->tapeset, state->result_tape, &output); + + /* Store properties of output tape, and update finished worker count */ + SpinLockAcquire(&shared->mutex); + shared->tapes[state->worker] = output; + shared->workersFinished++; + SpinLockRelease(&shared->mutex); +} + +/* + * worker_nomergeruns - dump memtuples in worker, without merging + * + * This called as an alternative to mergeruns() with a worker when no + * merging is required. + */ +static void +worker_nomergeruns(Tuplesortstate *state) +{ + Assert(WORKER(state)); + Assert(state->result_tape == -1); + + state->result_tape = state->tp_tapenum[state->destTape]; + worker_freeze_result_tape(state); +} + +/* + * leader_takeover_tapes - create tapeset for leader from worker tapes + * + * So far, leader Tuplesortstate has performed no actual sorting. By now, all + * sorting has occurred in workers, all of which must have already returned + * from tuplesort_performsort(). + * + * When this returns, leader process is left in a state that is virtually + * indistinguishable from it having generated runs as a serial external sort + * might have. + */ +static void +leader_takeover_tapes(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int nParticipants = state->nParticipants; + int workersFinished; + int j; + + Assert(LEADER(state)); + Assert(nParticipants >= 1); + + SpinLockAcquire(&shared->mutex); + workersFinished = shared->workersFinished; + SpinLockRelease(&shared->mutex); + + if (nParticipants != workersFinished) + elog(ERROR, "cannot take over tapes before all workers finish"); + + /* + * Create the tapeset from worker tapes, including a leader-owned tape at + * the end. Parallel workers are far more expensive than logical tapes, + * so the number of tapes allocated here should never be excessive. + * + * We still have a leader tape, though it's not possible to write to it + * due to restrictions in the shared fileset infrastructure used by + * logtape.c. It will never be written to in practice because + * randomAccess is disallowed for parallel sorts. + */ + inittapestate(state, nParticipants + 1); + state->tapeset = LogicalTapeSetCreate(nParticipants + 1, false, + shared->tapes, &shared->fileset, + state->worker); + + /* mergeruns() relies on currentRun for # of runs (in one-pass cases) */ + state->currentRun = nParticipants; + + /* + * Initialize variables of Algorithm D to be consistent with runs from + * workers having been generated in the leader. + * + * There will always be exactly 1 run per worker, and exactly one input + * tape per run, because workers always output exactly 1 run, even when + * there were no input tuples for workers to sort. + */ + for (j = 0; j < state->maxTapes; j++) + { + /* One real run; no dummy runs for worker tapes */ + state->tp_fib[j] = 1; + state->tp_runs[j] = 1; + state->tp_dummy[j] = 0; + state->tp_tapenum[j] = j; + } + /* Leader tape gets one dummy run, and no real runs */ + state->tp_fib[state->tapeRange] = 0; + state->tp_runs[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 1; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} diff --git a/src/tuplesort15.c b/src/tuplesort15.c new file mode 100644 index 0000000000..90e26745df --- /dev/null +++ b/src/tuplesort15.c @@ -0,0 +1,4698 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about external + * sorting algorithms. The algorithm we use is a balanced k-way merge. + * Before PostgreSQL 15, we used the polyphase merge algorithm (Knuth's + * Algorithm 5.4.2D), but with modern hardware, a straightforward balanced + * merge is better. Knuth is assuming that tape drives are expensive + * beasts, and in particular that there will always be many more runs than + * tape drives. The polyphase merge algorithm was good at keeping all the + * tape drives busy, but in our implementation a "tape drive" doesn't cost + * much more than a few Kb of memory buffers, so we can afford to have + * lots of them. In particular, if we can have as many tape drives as + * sorted runs, we can eliminate any repeated I/O at all. + * + * Historically, we divided the input into sorted runs using replacement + * selection, in the form of a priority tree implemented as a heap + * (essentially Knuth's Algorithm 5.2.3H), but now we always use quicksort + * for run generation. + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape. If we reach the max number of tapes, we write + * subsequent runs on the existing tapes in a round-robin fashion. We will + * need multiple merge passes to finish the merge in that case. After the + * end of the input is reached, we dump out remaining tuples in memory into + * a final run, then merge the runs. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and replace it + * with the next tuple from its source tape (if any). When the heap empties, + * the merge is complete. The basic merge algorithm thus needs very little + * memory --- only M tuples for an M-way merge, and M is constrained to a + * small number. However, we can still make good use of our full workMem + * allocation by pre-reading additional blocks from each source tape. Without + * prereading, our access pattern to the temporary file would be very erratic; + * on average we'd read one block from each of M source tapes during the same + * time that we're writing M blocks to the output tape, so there is no + * sequentiality of access at all, defeating the read-ahead methods used by + * most Unix kernels. Worse, the output tape gets written into a very random + * sequence of blocks of the temp file, ensuring that things will be even + * worse when it comes time to read that tape. A straightforward merge pass + * thus ends up doing a lot of waiting for disk seeks. We can improve matters + * by prereading from each source tape sequentially, loading about workMem/M + * bytes from each tape in turn, and making the sequential blocks immediately + * available for reuse. This approach helps to localize both read and write + * accesses. The pre-reading is handled by logtape.c, we just tell it how + * much memory to use for the buffers. + * + * In the current code we determine the number of input tapes M on the basis + * of workMem: we want workMem/M to be large enough that we read a fair + * amount of data each time we read from a tape, so as to maintain the + * locality of access described above. Nonetheless, with large workMem we + * can have many tapes. The logical "tapes" are implemented by logtape.c, + * which avoids space wastage by recycling disk space as soon as each block + * is read from its "tape". + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * This module supports parallel sorting. Parallel sorts involve coordination + * among one or more worker processes, and a leader process, each with its own + * tuplesort state. The leader process (or, more accurately, the + * Tuplesortstate associated with a leader process) creates a full tapeset + * consisting of worker tapes with one run to merge; a run for every + * worker process. This is then merged. Worker processes are guaranteed to + * produce exactly one output run from their partial input. + * + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* Sort parallel code from state for sort__start probes */ +#define PARALLEL_SORT(state) ((state)->shared == NULL ? 0 : \ + (state)->worker >= 0 ? 1 : 2) + +/* + * Initial size of memtuples array. We're trying to select this size so that + * array doesn't exceed ALLOCSET_SEPARATE_THRESHOLD and so that the overhead of + * allocation might possibly be lowered. However, we don't consider array sizes + * less than 1024. + * + */ +#define INITIAL_MEMTUPSIZE Max(1024, \ + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1) + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during merge, when we use a + * simple slab allocator). SortTuples also contain the tuple's first key + * column in Datum/nullflag format, and a source/input tape number that + * tracks which tape each heap element/slot belongs to during merging. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int srctape; /* source tape number */ +} SortTuple; + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much buffer space we'd like to allocate for each + * input tape, for pre-reading (see discussion at top of file). This is *in + * addition to* the 1 block already included in TAPE_BUFFER_OVERHEAD. + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* max number of input tapes to merge in each + * pass */ + int64 maxSpace; /* maximum amount of space occupied among sort + * of groups, either in-memory or on-disk */ + bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk + * space, false when it's value for in-memory + * space */ + TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ + MemoryContext maincontext; /* memory context for tuple sort metadata that + * persists across multiple batches */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. Unless the slab allocator is + * used, after writing the tuple, pfree() the out-of-line data (not the + * SortTuple struct!), and increase state->availMem by the amount of + * memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, LogicalTape *tape, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. The tuple is allocated + * from the slab memory arena, or is palloc'd, see readtup_alloc(). + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Memory used for input and output tape buffers. */ + size_t tape_buffer_mem; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Logical tapes, for merging. + * + * The initial runs are written in the output tapes. In each merge pass, + * the output tapes of the previous pass become the input tapes, and new + * output tapes are created as needed. When nInputTapes equals + * nInputRuns, there is only one merge pass left. + */ + LogicalTape **inputTapes; + int nInputTapes; + int nInputRuns; + + LogicalTape **outputTapes; + int nOutputTapes; + int nOutputRuns; + + LogicalTape *destTape; /* current output tape */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + LogicalTape *result_tape; /* actual tape of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run that the leader needs to merge. Typically includes a + * worker state held by the leader process itself. Set in the leader + * Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 high_mask; /* masks for sortable part of hash code */ + uint32 low_mask; + uint32 max_buckets; + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +/* + * Private mutable state of tuplesort-parallel-operation. This is allocated + * in shared memory. + */ +struct Sharedsort +{ + /* mutex protects all fields prior to tapes */ + slock_t mutex; + + /* + * currentWorker generates ordinal identifier numbers for parallel sort + * workers. These start from 0, and are always gapless. + * + * Workers increment workersFinished to indicate having finished. If this + * is equal to state.nParticipants within the leader, leader is ready to + * merge worker runs. + */ + int currentWorker; + int workersFinished; + + /* Temporary file space */ + SharedFileSet fileset; + + /* Size of tapes flexible array */ + int nTapes; + + /* + * Tapes array used by workers to report back information needed by the + * leader to concatenate all worker tapes into one for merging + */ + TapeShare tapes[FLEXIBLE_ARRAY_MEMBER]; +}; + +/* + * Is the given tuple allocated from the slab memory arena? + */ +#define IS_SLAB_SLOT(state, tuple) \ + ((char *) (tuple) >= (state)->slabMemoryBegin && \ + (char *) (tuple) < (state)->slabMemoryEnd) + +/* + * Return the given tuple to the slab memory free list, or free it + * if it was palloc'd. + */ +#define RELEASE_SLAB_SLOT(state, tuple) \ + do { \ + SlabSlot *buf = (SlabSlot *) tuple; \ + \ + if (IS_SLAB_SLOT((state), buf)) \ + { \ + buf->nextfree = (state)->slabFreeHead; \ + (state)->slabFreeHead = buf; \ + } else \ + pfree(buf); \ + } while(0) + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->slabAllocatorUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) +#define SERIAL(state) ((state)->shared == NULL) +#define WORKER(state) ((state)->shared && (state)->worker != -1) +#define LEADER(state) ((state)->shared && (state)->worker == -1) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on copytup routines to use the + * correct memory context for these tuples (and to not use the reset + * context for anything whose lifetime needs to span multiple external + * sort runs). readtup routines use the slab allocator (they cannot use + * the reset context because it gets deleted at the point that merging + * begins). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tape, ptr, len) \ + do { \ + if (LogicalTapeRead(tape, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, + SortCoordinate coordinate, + bool randomAccess); +static void tuplesort_begin_batch(Tuplesortstate *state); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state, bool mergeruns); +static void inittapestate(Tuplesortstate *state, int maxTapes); +static void selectnewtape(Tuplesortstate *state); +static void init_slab_allocator(Tuplesortstate *state, int numSlots); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state); +static bool mergereadnext(Tuplesortstate *state, LogicalTape *srcTape, SortTuple *stup); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple); +static void tuplesort_heap_delete_top(Tuplesortstate *state); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(LogicalTape *tape, bool eofOK); +static void markrunend(LogicalTape *tape); +static void *readtup_alloc(Tuplesortstate *state, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, LogicalTape *tape, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, LogicalTape *tape, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, LogicalTape *tape, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, LogicalTape *tape, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len); +static int worker_get_identifier(Tuplesortstate *state); +static void worker_freeze_result_tape(Tuplesortstate *state); +static void worker_nomergeruns(Tuplesortstate *state); +static void leader_takeover_tapes(Tuplesortstate *state); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); +static void tuplesort_free(Tuplesortstate *state); +static void tuplesort_updatemax(Tuplesortstate *state); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ + +#define ST_SORT qsort_tuple +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE_RUNTIME_POINTER +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DECLARE +#define ST_DEFINE +#include "lib/sort_template.h" + +#define ST_SORT qsort_ssup +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, ssup) \ + ApplySortComparator((a)->datum1, (a)->isnull1, \ + (b)->datum1, (b)->isnull1, (ssup)) +#define ST_COMPARE_ARG_TYPE SortSupportData +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext maincontext; + MemoryContext sortcontext; + MemoryContext oldcontext; + + /* See leader_takeover_tapes() remarks on randomAccess support */ + if (coordinate && randomAccess) + elog(ERROR, "random access disallowed under parallel sort"); + + /* + * Memory context surviving tuplesort_reset. This memory context holds + * data which is useful to keep while sorting multiple similar batches. + */ + maincontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Create a working memory context for one sort operation. The content of + * this context is deleted by tuplesort_reset. + */ + sortcontext = AllocSetContextCreate(maincontext, + "TupleSort sort", + ALLOCSET_DEFAULT_SIZES); + + /* + * Additionally a working memory context for tuples is setup in + * tuplesort_begin_batch. + */ + + /* + * Make the Tuplesortstate within the per-sortstate context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(maincontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->randomAccess = randomAccess; + state->tuples = true; + + /* + * workMem is forced to be at least 64KB, the current minimum valid value + * for the work_mem GUC. This is a defense against parallel sort callers + * that divide out memory among many workers in a way that leaves each + * with very little memory. + */ + state->allowedMem = Max(workMem, 64) * (int64) 1024; + state->sortcontext = sortcontext; + state->maincontext = maincontext; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = INITIAL_MEMTUPSIZE; + state->memtuples = NULL; + + /* + * After all of the other non-parallel-related state, we setup all of the + * state needed for each batch. + */ + tuplesort_begin_batch(state); + + /* + * Initialize parallel-related state based on coordination information + * from caller + */ + if (!coordinate) + { + /* Serial sort */ + state->shared = NULL; + state->worker = -1; + state->nParticipants = -1; + } + else if (coordinate->isWorker) + { + /* Parallel worker produces exactly one final run from all input */ + state->shared = coordinate->sharedsort; + state->worker = worker_get_identifier(state); + state->nParticipants = -1; + } + else + { + /* Parallel leader state only used for final merge */ + state->shared = coordinate->sharedsort; + state->worker = -1; + state->nParticipants = coordinate->nParticipants; + Assert(state->nParticipants >= 1); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_begin_batch + * + * Setup, or reset, all state need for processing a new set of tuples with this + * sort state. Called both from tuplesort_begin_common (the first time sorting + * with this sort state) and tuplesort_reset (for subsequent usages). + */ +static void +tuplesort_begin_batch(Tuplesortstate *state) +{ + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + state->tuplecontext = AllocSetContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + state->status = TSS_INITIAL; + state->bounded = false; + state->boundUsed = false; + + state->availMem = state->allowedMem; + + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->growmemtuples = true; + state->slabAllocatorUsed = false; + if (state->memtuples != NULL && state->memtupsize != INITIAL_MEMTUPSIZE) + { + pfree(state->memtuples); + state->memtuples = NULL; + state->memtupsize = INITIAL_MEMTUPSIZE; + } + if (state->memtuples == NULL) + { + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + } + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = 0; + + /* + * Tape variables (inputTapes, outputTapes, etc.) will be initialized by + * inittapes(), if needed. + */ + + state->result_tape = NULL; /* flag that result tape has not been formed */ + + MemoryContextSwitchTo(oldcontext); +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc, &TTSOpsHeapTuple); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + BTScanInsert indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey(indexRel, NULL); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey->scankeys + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + pfree(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 high_mask, + uint32 low_mask, + uint32 max_buckets, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: high_mask = 0x%x, low_mask = 0x%x, " + "max_buckets = 0x%x, workMem = %d, randomAccess = %c", + high_mask, + low_mask, + max_buckets, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->high_mask = high_mask; + state->low_mask = low_mask; + state->max_buckets = max_buckets; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_gist(Relation heapRel, + Relation indexRel, + int workMem, + SortCoordinate coordinate, + bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = indexRel->rd_indcollation[i]; + sortKey->ssup_nulls_first = false; + sortKey->ssup_attno = i + 1; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + /* Look for a sort support function */ + PrepareSortSupportFromGistIndexRel(indexRel, sortKey); + } + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, int workMem, + SortCoordinate coordinate, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, + randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->maincontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess, + PARALLEL_SORT(state)); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple as needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each SortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. Parallel leader tuplesorts will always ignore the hint. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL && state->memtupcount == 0); + /* Can't set the bound twice, either */ + Assert(!state->bounded); + /* Also, this shouldn't be called in a parallel worker */ + Assert(!WORKER(state)); + + /* Parallel leader allows but ignores hint */ + if (LEADER(state)) + return; + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_used_bound + * + * Allow callers to find out if the sort state was able to use a bound. + */ +bool +tuplesort_used_bound(Tuplesortstate *state) +{ + return state->boundUsed; +} + +/* + * tuplesort_free + * + * Internal routine for freeing resources of tuplesort. + */ +static void +tuplesort_free(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + * + * We don't bother to destroy the individual tapes here. They will go away + * with the sortcontext. (In TSS_FINALMERGE state, we have closed + * finished tapes already.) + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", + SERIAL(state) ? "external sort" : "parallel external sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "%s of worker %d ended, %ld KB used: %s", + SERIAL(state) ? "internal sort" : "unperformed parallel sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory. + */ + MemoryContextReset(state->sortcontext); +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + tuplesort_free(state); + + /* + * Free the main memory context, including the Tuplesortstate struct + * itself. + */ + MemoryContextDelete(state->maincontext); +} + +/* + * tuplesort_updatemax + * + * Update maximum resource usage statistics. + */ +static void +tuplesort_updatemax(Tuplesortstate *state) +{ + int64 spaceUsed; + bool isSpaceDisk; + + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + isSpaceDisk = true; + spaceUsed = LogicalTapeSetBlocks(state->tapeset) * BLCKSZ; + } + else + { + isSpaceDisk = false; + spaceUsed = state->allowedMem - state->availMem; + } + + /* + * Sort evicts data to the disk when it wasn't able to fit that data into + * main memory. This is why we assume space used on the disk to be more + * important for tracking resource usage than space used in memory. Note + * that the amount of space occupied by some tupleset on the disk might be + * less than amount of space occupied by the same tupleset in memory due + * to more compact representation. + */ + if ((isSpaceDisk && !state->isMaxSpaceDisk) || + (isSpaceDisk == state->isMaxSpaceDisk && spaceUsed > state->maxSpace)) + { + state->maxSpace = spaceUsed; + state->isMaxSpaceDisk = isSpaceDisk; + state->maxSpaceStatus = state->status; + } +} + +/* + * tuplesort_reset + * + * Reset the tuplesort. Reset all the data in the tuplesort, but leave the + * meta-information in. After tuplesort_reset, tuplesort is ready to start + * a new sort. This allows avoiding recreation of tuple sort states (and + * save resources) when sorting multiple small batches. + */ +void +tuplesort_reset(Tuplesortstate *state) +{ + tuplesort_updatemax(state); + tuplesort_free(state); + + /* + * After we've freed up per-batch memory, re-setup all of the state common + * to both the first batch and any subsequent batch. + */ + tuplesort_begin_batch(state); + + state->lastReturnedTuple = NULL; + state->slabMemoryBegin = NULL; + state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return true if we were able to enlarge the array, false if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + Assert(!LEADER(state)); + + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state, true); + + /* + * Dump all tuples. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, replacing it with the new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_replace_top(state, tuple); + } + break; + + case TSS_BUILDRUNS: + + /* + * Save the tuple into the unsorted array (there must be space) + */ + state->memtuples[state->memtupcount++] = *tuple; + + /* + * If we are over the memory limit, dump all tuples. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort of worker %d starting: %s", + state->worker, pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory, or leader to take over worker tapes + */ + if (SERIAL(state)) + { + /* Just qsort 'em and we're done */ + tuplesort_sort_memtuples(state); + state->status = TSS_SORTEDINMEM; + } + else if (WORKER(state)) + { + /* + * Parallel workers must still dump out tuples to tape. No + * merge is required to produce single output run, though. + */ + inittapes(state, false); + dumptuples(state, true); + worker_nomergeruns(state); + state->status = TSS_SORTEDONTAPE; + } + else + { + /* + * Leader will take over worker tapes and merge worker runs. + * Note that mergeruns sets the correct state->status. + */ + leader_takeover_tapes(state); + mergeruns(state); + } + state->current = 0; + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess and !WORKER(), one run per tape). + * Note that mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort of worker %d done (except %d-way final merge): %s", + state->worker, state->nInputTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort of worker %d done: %s", + state->worker, pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns false if no more tuples. + * Returned tuple belongs to tuplesort memory context, and must not be freed + * by caller. Note that fetched tuple is stored in memory that may be + * recycled by any future fetch. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup) +{ + unsigned int tuplen; + size_t nmoved; + + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->slabAllocatorUsed); + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(state->slabAllocatorUsed); + + /* + * The slot that held the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + if (forward) + { + if (state->eof_reached) + return false; + + if ((tuplen = getlen(state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle + * its memory on next call. (This can be NULL, in the + * !state->tuples case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + nmoved = LogicalTapeBackspace(state->result_tape, + 2 * sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != 2 * sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + nmoved = LogicalTapeBackspace(state->result_tape, + sizeof(unsigned int)); + if (nmoved == 0) + return false; + else if (nmoved != sizeof(unsigned int)) + elog(ERROR, "unexpected tape position"); + tuplen = getlen(state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + nmoved = LogicalTapeBackspace(state->result_tape, + tuplen + 2 * sizeof(unsigned int)); + if (nmoved == tuplen + sizeof(unsigned int)) + { + /* + * We backed up over the previous tuple, but there was no + * ending length word before it. That means that the prev + * tuple is the first tuple in the file. It is now the + * next to read in forward direction (not obviously right, + * but that is what in-memory case does). + */ + return false; + } + else if (nmoved != tuplen + 2 * sizeof(unsigned int)) + elog(ERROR, "bogus tuple length in backward scan"); + } + + tuplen = getlen(state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + nmoved = LogicalTapeBackspace(state->result_tape, + tuplen); + if (nmoved != tuplen) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + + /* + * Remember the tuple we return, so that we can recycle its memory + * on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + return true; + + case TSS_FINALMERGE: + Assert(forward); + /* We are managing memory ourselves, with the slab allocator. */ + Assert(state->slabAllocatorUsed); + + /* + * The slab slot holding the tuple that we returned in previous + * gettuple call can now be reused. + */ + if (state->lastReturnedTuple) + { + RELEASE_SLAB_SLOT(state, state->lastReturnedTuple); + state->lastReturnedTuple = NULL; + } + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTapeIndex = state->memtuples[0].srctape; + LogicalTape *srcTape = state->inputTapes[srcTapeIndex]; + SortTuple newtup; + + *stup = state->memtuples[0]; + + /* + * Remember the tuple we return, so that we can recycle its + * memory on next call. (This can be NULL, in the Datum case). + */ + state->lastReturnedTuple = stup->tuple; + + /* + * Pull next tuple from tape, and replace the returned tuple + * at top of the heap with it. + */ + if (!mergereadnext(state, srcTape, &newtup)) + { + /* + * If no more data, we've reached end of run on this tape. + * Remove the top node from the heap. + */ + tuplesort_heap_delete_top(state); + state->nInputRuns--; + + /* + * Close the tape. It'd go away at the end of the sort + * anyway, but better to release the memory early. + */ + LogicalTapeClose(srcTape); + return true; + } + newtup.srctape = srcTapeIndex; + tuplesort_heap_replace_top(state, &newtup); + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return true; else, clear the slot + * and return false. + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * If copy is true, the slot receives a tuple that's been copied into the + * caller's memory context, so that it will stay valid regardless of future + * manipulations of the tuplesort's state (up to and including deleting the + * tuplesort). If copy is false, the slot will just receive a pointer to a + * tuple held within the tuplesort, which is more efficient, but only safe for + * callers that are prepared to have any subsequent manipulation of the + * tuplesort's state invalidate slot contents. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, bool copy, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (copy) + stup.tuple = heap_copy_minimal_tuple((MinimalTuple) stup.tuple); + + ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, copy); + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory + * context, and must not be freed by caller. Caller may not rely on tuple + * remaining valid after any further manipulation of tuplesort. + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns false if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on true return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* use stup.tuple because stup.datum1 may be an abbreviation */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns true if successful, false if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + Assert(!WORKER(state)); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /*---------- + * In the merge phase, we need buffer space for each input and output tape. + * Each pass in the balanced merge algorithm reads from M input tapes, and + * writes to N output tapes. Each tape consumes TAPE_BUFFER_OVERHEAD bytes + * of memory. In addition to that, we want MERGE_BUFFER_SIZE workspace per + * input tape. + * + * totalMem = M * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE) + + * N * TAPE_BUFFER_OVERHEAD + * + * Except for the last and next-to-last merge passes, where there can be + * fewer tapes left to process, M = N. We choose M so that we have the + * desired amount of memory available for the input buffers + * (TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE), given the total memory + * available for the tape buffers (allowedMem). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + *---------- + */ + mOrder = allowedMem / + (2 * TAPE_BUFFER_OVERHEAD + MERGE_BUFFER_SIZE); + + /* + * Even in minimum memory, use at least a MINORDER merge. On the other + * hand, even when we have lots of memory, do not use more than a MAXORDER + * merge. Tapes are pretty cheap, but they're not entirely free. Each + * additional tape reduces the amount of memory available to build runs, + * which in turn can cause the same sort to need more runs, which makes + * merging slower even if it can still be done in a single pass. Also, + * high order merges are quite slow due to CPU cache effects; it can be + * faster to pay the I/O cost of a multi-pass merge than to perform a + * single merge pass across many hundreds of tapes. + */ + mOrder = Max(mOrder, MINORDER); + mOrder = Min(mOrder, MAXORDER); + + return mOrder; +} + +/* + * Helper function to calculate how much memory to allocate for the read buffer + * of each input tape in a merge pass. + * + * 'avail_mem' is the amount of memory available for the buffers of all the + * tapes, both input and output. + * 'nInputTapes' and 'nInputRuns' are the number of input tapes and runs. + * 'maxOutputTapes' is the max. number of output tapes we should produce. + */ +static int64 +merge_read_buffer_size(int64 avail_mem, int nInputTapes, int nInputRuns, + int maxOutputTapes) +{ + int nOutputRuns; + int nOutputTapes; + + /* + * How many output tapes will we produce in this pass? + * + * This is nInputRuns / nInputTapes, rounded up. + */ + nOutputRuns = (nInputRuns + nInputTapes - 1) / nInputTapes; + + nOutputTapes = Min(nOutputRuns, maxOutputTapes); + + /* + * Each output tape consumes TAPE_BUFFER_OVERHEAD bytes of memory. All + * remaining memory is divided evenly between the input tapes. + * + * This also follows from the formula in tuplesort_merge_order, but here + * we derive the input buffer size from the amount of memory available, + * and M and N. + */ + return Max((avail_mem - TAPE_BUFFER_OVERHEAD * nOutputTapes) / nInputTapes, 0); +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we won't sort in memory. + */ +static void +inittapes(Tuplesortstate *state, bool mergeruns) +{ + Assert(!LEADER(state)); + + if (mergeruns) + { + /* Compute number of input tapes to use when merging */ + state->maxTapes = tuplesort_merge_order(state->allowedMem); + } + else + { + /* Workers can sometimes produce single run, output without merge */ + Assert(WORKER(state)); + state->maxTapes = MINORDER; + } + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d switching to external sort with %d tapes: %s", + state->worker, state->maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* Create the tape set */ + inittapestate(state, state->maxTapes); + state->tapeset = + LogicalTapeSetCreate(false, + state->shared ? &state->shared->fileset : NULL, + state->worker); + + state->currentRun = 0; + + /* + * Initialize logical tape arrays. + */ + state->inputTapes = NULL; + state->nInputTapes = 0; + state->nInputRuns = 0; + + state->outputTapes = palloc0(state->maxTapes * sizeof(LogicalTape *)); + state->nOutputTapes = 0; + state->nOutputRuns = 0; + + state->status = TSS_BUILDRUNS; + + selectnewtape(state); +} + +/* + * inittapestate - initialize generic tape management state + */ +static void +inittapestate(Tuplesortstate *state, int maxTapes) +{ + int64 tapeSpace; + + /* + * Decrease availMem to reflect the space needed for tape buffers; but + * don't decrease it to the point that we have no room for tuples. (That + * case is only likely to occur if sorting pass-by-value Datums; in all + * other scenarios the memtuples[] array is unlikely to occupy more than + * half of allowedMem. In the pass-by-value case it's not important to + * account for tuple space, so we don't care if LACKMEM becomes + * inaccurate.) + */ + tapeSpace = (int64) maxTapes * TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. For parallel sorts, this should have been + * called already, but it doesn't matter if it is called a second time. + */ + PrepareTempTablespaces(); +} + +/* + * selectnewtape -- select next tape to output to. + * + * This is called after finishing a run when we know another run + * must be started. This is used both when building the initial + * runs, and during merge passes. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + /* + * At the beginning of each merge pass, nOutputTapes and nOutputRuns are + * both zero. On each call, we create a new output tape to hold the next + * run, until maxTapes is reached. After that, we assign new runs to the + * existing tapes in a round robin fashion. + */ + if (state->nOutputTapes < state->maxTapes) + { + /* Create a new tape to hold the next run */ + Assert(state->outputTapes[state->nOutputRuns] == NULL); + Assert(state->nOutputRuns == state->nOutputTapes); + state->destTape = LogicalTapeCreate(state->tapeset); + state->outputTapes[state->nOutputTapes] = state->destTape; + state->nOutputTapes++; + state->nOutputRuns++; + } + else + { + /* + * We have reached the max number of tapes. Append to an existing + * tape. + */ + state->destTape = state->outputTapes[state->nOutputRuns % state->nOutputTapes]; + state->nOutputRuns++; + } +} + +/* + * Initialize the slab allocation arena, for the given number of slots. + */ +static void +init_slab_allocator(Tuplesortstate *state, int numSlots) +{ + if (numSlots > 0) + { + char *p; + int i; + + state->slabMemoryBegin = palloc(numSlots * SLAB_SLOT_SIZE); + state->slabMemoryEnd = state->slabMemoryBegin + + numSlots * SLAB_SLOT_SIZE; + state->slabFreeHead = (SlabSlot *) state->slabMemoryBegin; + USEMEM(state, numSlots * SLAB_SLOT_SIZE); + + p = state->slabMemoryBegin; + for (i = 0; i < numSlots - 1; i++) + { + ((SlabSlot *) p)->nextfree = (SlabSlot *) (p + SLAB_SLOT_SIZE); + p += SLAB_SLOT_SIZE; + } + ((SlabSlot *) p)->nextfree = NULL; + } + else + { + state->slabMemoryBegin = state->slabMemoryEnd = NULL; + state->slabFreeHead = NULL; + } + state->slabAllocatorUsed = true; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements the Balanced k-Way Merge Algorithm. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * Reset tuple memory. We've freed all the tuples that we previously + * allocated. We will use the slab allocator from now on. + */ + MemoryContextResetOnly(state->tuplecontext); + + /* + * We no longer need a large memtuples array. (We will allocate a smaller + * one for the heap later.) + */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + pfree(state->memtuples); + state->memtuples = NULL; + + /* + * Initialize the slab allocator. We need one slab slot per input tape, + * for the tuples in the heap, plus one to hold the tuple last returned + * from tuplesort_gettuple. (If we're sorting pass-by-val Datums, + * however, we don't need to do allocate anything.) + * + * In a multi-pass merge, we could shrink this allocation for the last + * merge pass, if it has fewer tapes than previous passes, but we don't + * bother. + * + * From this point on, we no longer use the USEMEM()/LACKMEM() mechanism + * to track memory usage of individual tuples. + */ + if (state->tuples) + init_slab_allocator(state, state->nOutputTapes + 1); + else + init_slab_allocator(state, 0); + + /* + * Allocate a new 'memtuples' array, for the heap. It will hold one tuple + * from each input tape. + * + * We could shrink this, too, between passes in a multi-pass merge, but we + * don't bother. (The initial input tapes are still in outputTapes. The + * number of input tapes will not increase between passes.) + */ + state->memtupsize = state->nOutputTapes; + state->memtuples = (SortTuple *) MemoryContextAlloc(state->maincontext, + state->nOutputTapes * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* + * Use all the remaining memory we have available for tape buffers among + * all the input tapes. At the beginning of each merge pass, we will + * divide this memory between the input and output tapes in the pass. + */ + state->tape_buffer_mem = state->availMem; + USEMEM(state, state->tape_buffer_mem); +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d using %zu KB of memory for tape buffers", + state->worker, state->tape_buffer_mem / 1024); +#endif + + for (;;) + { + /* + * On the first iteration, or if we have read all the runs from the + * input tapes in a multi-pass merge, it's time to start a new pass. + * Rewind all the output tapes, and make them inputs for the next + * pass. + */ + if (state->nInputRuns == 0) + { + int64 input_buffer_size; + + /* Close the old, emptied, input tapes */ + if (state->nInputTapes > 0) + { + for (tapenum = 0; tapenum < state->nInputTapes; tapenum++) + LogicalTapeClose(state->inputTapes[tapenum]); + pfree(state->inputTapes); + } + + /* Previous pass's outputs become next pass's inputs. */ + state->inputTapes = state->outputTapes; + state->nInputTapes = state->nOutputTapes; + state->nInputRuns = state->nOutputRuns; + + /* + * Reset output tape variables. The actual LogicalTapes will be + * created as needed, here we only allocate the array to hold + * them. + */ + state->outputTapes = palloc0(state->nInputTapes * sizeof(LogicalTape *)); + state->nOutputTapes = 0; + state->nOutputRuns = 0; + + /* + * Redistribute the memory allocated for tape buffers, among the + * new input and output tapes. + */ + input_buffer_size = merge_read_buffer_size(state->tape_buffer_mem, + state->nInputTapes, + state->nInputRuns, + state->maxTapes); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "starting merge pass of %d input runs on %d tapes, " INT64_FORMAT " KB of memory for each input tape: %s", + state->nInputRuns, state->nInputTapes, input_buffer_size / 1024, + pg_rusage_show(&state->ru_start)); +#endif + + /* Prepare the new input tapes for merge pass. */ + for (tapenum = 0; tapenum < state->nInputTapes; tapenum++) + LogicalTapeRewindForRead(state->inputTapes[tapenum], input_buffer_size); + + /* + * If there's just one run left on each input tape, then only one + * merge pass remains. If we don't have to produce a materialized + * sorted tape, we can stop at this point and do the final merge + * on-the-fly. + */ + if (!state->randomAccess && state->nInputRuns <= state->nInputTapes + && !WORKER(state)) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Select an output tape */ + selectnewtape(state); + + /* Merge one run from each input tape. */ + mergeonerun(state); + + /* + * If the input tapes are empty, and we output only one output run, + * we're done. The current output tape contains the final result. + */ + if (state->nInputRuns == 0 && state->nOutputRuns <= 1) + break; + } + + /* + * Done. The result is on a single run on a single tape. + */ + state->result_tape = state->outputTapes[0]; + if (!WORKER(state)) + LogicalTapeFreeze(state->result_tape, NULL); + else + worker_freeze_result_tape(state); + state->status = TSS_SORTEDONTAPE; + + /* Close all the now-empty input tapes, to release their read buffers. */ + for (tapenum = 0; tapenum < state->nInputTapes; tapenum++) + LogicalTapeClose(state->inputTapes[tapenum]); +} + +/* + * Merge one run from each input tape. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int srcTapeIndex; + LogicalTape *srcTape; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. + */ + beginmerge(state); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + SortTuple stup; + + /* write the tuple to destTape */ + srcTapeIndex = state->memtuples[0].srctape; + srcTape = state->inputTapes[srcTapeIndex]; + WRITETUP(state, state->destTape, &state->memtuples[0]); + + /* recycle the slot of the tuple we just wrote out, for the next read */ + if (state->memtuples[0].tuple) + RELEASE_SLAB_SLOT(state, state->memtuples[0].tuple); + + /* + * pull next tuple from the tape, and replace the written-out tuple in + * the heap with it. + */ + if (mergereadnext(state, srcTape, &stup)) + { + stup.srctape = srcTapeIndex; + tuplesort_heap_replace_top(state, &stup); + + } + else + { + tuplesort_heap_delete_top(state); + state->nInputRuns--; + } + } + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape. + */ + markrunend(state->destTape); +} + +/* + * beginmerge - initialize for a merge pass + * + * Fill the merge heap with the first tuple from each input tape. + */ +static void +beginmerge(Tuplesortstate *state) +{ + int activeTapes; + int srcTapeIndex; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + activeTapes = Min(state->nInputTapes, state->nInputRuns); + + for (srcTapeIndex = 0; srcTapeIndex < activeTapes; srcTapeIndex++) + { + SortTuple tup; + + if (mergereadnext(state, state->inputTapes[srcTapeIndex], &tup)) + { + tup.srctape = srcTapeIndex; + tuplesort_heap_insert(state, &tup); + } + } +} + +/* + * mergereadnext - read next tuple from one merge input tape + * + * Returns false on EOF. + */ +static bool +mergereadnext(Tuplesortstate *state, LogicalTape *srcTape, SortTuple *stup) +{ + unsigned int tuplen; + + /* read next tuple, if any */ + if ((tuplen = getlen(srcTape, true)) == 0) + return false; + READTUP(state, stup, srcTape, tuplen); + + return true; +} + +/* + * dumptuples - remove tuples from memtuples and write initial run to tape + * + * When alltuples = true, dump everything currently in memory. (This case is + * only used at end of input data.) + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Nothing to do if we still fit in available memory and have array slots, + * unless this is the final call during initial run generation. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state) && + !alltuples) + return; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. In general, short final runs are quite possible, but avoid + * creating a completely empty run. In a worker, though, we must produce + * at least one tape, even if it's empty. + */ + if (state->memtupcount == 0 && state->currentRun > 0) + return; + + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + if (state->currentRun > 0) + selectnewtape(state); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d starting quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished quicksort of run %d: %s", + state->worker, state->currentRun, + pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->destTape, &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in the sizes of incoming tuples. Fragmentation due to + * AllocSetFree's bucketing by size class might be particularly bad if + * this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state->destTape); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "worker %d finished writing run %d to tape %d: %s", + state->worker, state->currentRun, (state->currentRun - 1) % state->nOutputTapes + 1, + pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewindForRead(state->result_tape, 0); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + LogicalTapeSeek(state->result_tape, + state->markpos_block, + state->markpos_offset); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + TuplesortInstrumentation *stats) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + tuplesort_updatemax(state); + + if (state->isMaxSpaceDisk) + stats->spaceType = SORT_SPACE_TYPE_DISK; + else + stats->spaceType = SORT_SPACE_TYPE_MEMORY; + stats->spaceUsed = (state->maxSpace + 1023) / 1024; + + switch (state->maxSpaceStatus) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + stats->sortMethod = SORT_TYPE_TOP_N_HEAPSORT; + else + stats->sortMethod = SORT_TYPE_QUICKSORT; + break; + case TSS_SORTEDONTAPE: + stats->sortMethod = SORT_TYPE_EXTERNAL_SORT; + break; + case TSS_FINALMERGE: + stats->sortMethod = SORT_TYPE_EXTERNAL_MERGE; + break; + default: + stats->sortMethod = SORT_TYPE_STILL_IN_PROGRESS; + break; + } +} + +/* + * Convert TuplesortMethod to a string. + */ +const char * +tuplesort_method_name(TuplesortMethod m) +{ + switch (m) + { + case SORT_TYPE_STILL_IN_PROGRESS: + return "still in progress"; + case SORT_TYPE_TOP_N_HEAPSORT: + return "top-N heapsort"; + case SORT_TYPE_QUICKSORT: + return "quicksort"; + case SORT_TYPE_EXTERNAL_SORT: + return "external sort"; + case SORT_TYPE_EXTERNAL_MERGE: + return "external merge"; + } + + return "unknown"; +} + +/* + * Convert TuplesortSpaceType to a string. + */ +const char * +tuplesort_space_type_name(TuplesortSpaceType t) +{ + Assert(t == SORT_SPACE_TYPE_DISK || t == SORT_SPACE_TYPE_MEMORY); + return t == SORT_SPACE_TYPE_DISK ? "Disk" : "Memory"; +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + */ + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + Assert(SERIAL(state)); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount < state->bound) + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup); + } + else + { + /* + * The heap is full. Replace the largest entry with the new + * tuple, or just discard it, if it's larger than anything already + * in the heap. + */ + if (COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + tuplesort_heap_replace_top(state, &state->memtuples[i]); + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + Assert(SERIAL(state)); + + /* + * We can unheapify in place because each delete-top call will remove the + * largest entry, which we can promptly store in the newly freed slot at + * the end. Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_delete_top(state); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts, and external sort runs. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + Assert(!LEADER(state)); + + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: For some callers, tuple points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples; + int j; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (COMPARETUP(state, tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * Remove the tuple at state->memtuples[0] from the heap. Decrement + * memtupcount, and sift up to maintain the heap invariant. + * + * The caller has already free'd the tuple the top node points to, + * if necessary. + */ +static void +tuplesort_heap_delete_top(Tuplesortstate *state) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + + if (--state->memtupcount <= 0) + return; + + /* + * Remove the last tuple in the heap, and re-insert it, by replacing the + * current top node with it. + */ + tuple = &memtuples[state->memtupcount]; + tuplesort_heap_replace_top(state, tuple); +} + +/* + * Replace the tuple at state->memtuples[0] with a new tuple. Sift up to + * maintain the heap invariant. + * + * This corresponds to Knuth's "sift-up" algorithm (Algorithm 5.2.3H, + * Heapsort, steps H3-H8). + */ +static void +tuplesort_heap_replace_top(Tuplesortstate *state, SortTuple *tuple) +{ + SortTuple *memtuples = state->memtuples; + unsigned int i, + n; + + Assert(state->memtupcount >= 1); + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (COMPARETUP(state, tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(LogicalTape *tape, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(tape, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(LogicalTape *tape) +{ + unsigned int len = 0; + + LogicalTapeWrite(tape, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. + * + * We use next free slot from the slab allocator, or palloc() if the tuple + * is too large for that. + */ +static void * +readtup_alloc(Tuplesortstate *state, Size tuplen) +{ + SlabSlot *buf; + + /* + * We pre-allocate enough slots in the slab arena that we should never run + * out. + */ + Assert(state->slabFreeHead); + + if (tuplen > SLAB_SLOT_SIZE || !state->slabFreeHead) + return MemoryContextAlloc(state->sortcontext, tuplen); + else + { + buf = state->slabFreeHead; + /* Reuse this slot */ + state->slabFreeHead = buf->nextfree; + + return buf; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(tape, (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); + } +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(tape, tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_IndexAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreHeapTuple(ltup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreHeapTuple(rtup, ecxt_scantuple, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); + LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); + } +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(tape, &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_IndexAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This is required for + * btree indexes, since heap TID is treated as an implicit last key + * attribute in order to ensure that all keys in the index are physically + * unique. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + Bucket bucket1; + Bucket bucket2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + bucket1 = _hash_hashkey2bucket(DatumGetUInt32(a->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + Assert(!b->isnull1); + bucket2 = _hash_hashkey2bucket(DatumGetUInt32(b->datum1), + state->max_buckets, state->high_mask, + state->low_mask); + if (bucket1 > bucket2) + return 1; + else if (bucket1 < bucket2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_index() should not be called"); +} + +static void +writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(tape, (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); + + if (!state->slabAllocatorUsed) + { + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); + } +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); + + LogicalTapeReadExact(tape, tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(tape, waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen)); + + if (!state->slabAllocatorUsed && stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + LogicalTape *tape, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(tape, &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tuplen); + + LogicalTapeReadExact(tape, raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); +} + +/* + * Parallel sort routines + */ + +/* + * tuplesort_estimate_shared - estimate required shared memory allocation + * + * nWorkers is an estimate of the number of workers (it's the number that + * will be requested). + */ +Size +tuplesort_estimate_shared(int nWorkers) +{ + Size tapesSize; + + Assert(nWorkers > 0); + + /* Make sure that BufFile shared state is MAXALIGN'd */ + tapesSize = mul_size(sizeof(TapeShare), nWorkers); + tapesSize = MAXALIGN(add_size(tapesSize, offsetof(Sharedsort, tapes))); + + return tapesSize; +} + +/* + * tuplesort_initialize_shared - initialize shared tuplesort state + * + * Must be called from leader process before workers are launched, to + * establish state needed up-front for worker tuplesortstates. nWorkers + * should match the argument passed to tuplesort_estimate_shared(). + */ +void +tuplesort_initialize_shared(Sharedsort *shared, int nWorkers, dsm_segment *seg) +{ + int i; + + Assert(nWorkers > 0); + + SpinLockInit(&shared->mutex); + shared->currentWorker = 0; + shared->workersFinished = 0; + SharedFileSetInit(&shared->fileset, seg); + shared->nTapes = nWorkers; + for (i = 0; i < nWorkers; i++) + { + shared->tapes[i].firstblocknumber = 0L; + } +} + +/* + * tuplesort_attach_shared - attach to shared tuplesort state + * + * Must be called by all worker processes. + */ +void +tuplesort_attach_shared(Sharedsort *shared, dsm_segment *seg) +{ + /* Attach to SharedFileSet */ + SharedFileSetAttach(&shared->fileset, seg); +} + +/* + * worker_get_identifier - Assign and return ordinal identifier for worker + * + * The order in which these are assigned is not well defined, and should not + * matter; worker numbers across parallel sort participants need only be + * distinct and gapless. logtape.c requires this. + * + * Note that the identifiers assigned from here have no relation to + * ParallelWorkerNumber number, to avoid making any assumption about + * caller's requirements. However, we do follow the ParallelWorkerNumber + * convention of representing a non-worker with worker number -1. This + * includes the leader, as well as serial Tuplesort processes. + */ +static int +worker_get_identifier(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int worker; + + Assert(WORKER(state)); + + SpinLockAcquire(&shared->mutex); + worker = shared->currentWorker++; + SpinLockRelease(&shared->mutex); + + return worker; +} + +/* + * worker_freeze_result_tape - freeze worker's result tape for leader + * + * This is called by workers just after the result tape has been determined, + * instead of calling LogicalTapeFreeze() directly. They do so because + * workers require a few additional steps over similar serial + * TSS_SORTEDONTAPE external sort cases, which also happen here. The extra + * steps are around freeing now unneeded resources, and representing to + * leader that worker's input run is available for its merge. + * + * There should only be one final output run for each worker, which consists + * of all tuples that were originally input into worker. + */ +static void +worker_freeze_result_tape(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + TapeShare output; + + Assert(WORKER(state)); + Assert(state->result_tape != NULL); + Assert(state->memtupcount == 0); + + /* + * Free most remaining memory, in case caller is sensitive to our holding + * on to it. memtuples may not be a tiny merge heap at this point. + */ + pfree(state->memtuples); + /* Be tidy */ + state->memtuples = NULL; + state->memtupsize = 0; + + /* + * Parallel worker requires result tape metadata, which is to be stored in + * shared memory for leader + */ + LogicalTapeFreeze(state->result_tape, &output); + + /* Store properties of output tape, and update finished worker count */ + SpinLockAcquire(&shared->mutex); + shared->tapes[state->worker] = output; + shared->workersFinished++; + SpinLockRelease(&shared->mutex); +} + +/* + * worker_nomergeruns - dump memtuples in worker, without merging + * + * This called as an alternative to mergeruns() with a worker when no + * merging is required. + */ +static void +worker_nomergeruns(Tuplesortstate *state) +{ + Assert(WORKER(state)); + Assert(state->result_tape == NULL); + Assert(state->nOutputRuns == 1); + + state->result_tape = state->destTape; + worker_freeze_result_tape(state); +} + +/* + * leader_takeover_tapes - create tapeset for leader from worker tapes + * + * So far, leader Tuplesortstate has performed no actual sorting. By now, all + * sorting has occurred in workers, all of which must have already returned + * from tuplesort_performsort(). + * + * When this returns, leader process is left in a state that is virtually + * indistinguishable from it having generated runs as a serial external sort + * might have. + */ +static void +leader_takeover_tapes(Tuplesortstate *state) +{ + Sharedsort *shared = state->shared; + int nParticipants = state->nParticipants; + int workersFinished; + int j; + + Assert(LEADER(state)); + Assert(nParticipants >= 1); + + SpinLockAcquire(&shared->mutex); + workersFinished = shared->workersFinished; + SpinLockRelease(&shared->mutex); + + if (nParticipants != workersFinished) + elog(ERROR, "cannot take over tapes before all workers finish"); + + /* + * Create the tapeset from worker tapes, including a leader-owned tape at + * the end. Parallel workers are far more expensive than logical tapes, + * so the number of tapes allocated here should never be excessive. + */ + inittapestate(state, nParticipants); + state->tapeset = LogicalTapeSetCreate(false, &shared->fileset, -1); + + /* + * Set currentRun to reflect the number of runs we will merge (it's not + * used for anything, this is just pro forma) + */ + state->currentRun = nParticipants; + + /* + * Initialize the state to look the same as after building the initial + * runs. + * + * There will always be exactly 1 run per worker, and exactly one input + * tape per run, because workers always output exactly 1 run, even when + * there were no input tuples for workers to sort. + */ + state->inputTapes = NULL; + state->nInputTapes = 0; + state->nInputRuns = 0; + + state->outputTapes = palloc0(nParticipants * sizeof(LogicalTape *)); + state->nOutputTapes = nParticipants; + state->nOutputRuns = nParticipants; + + for (j = 0; j < nParticipants; j++) + { + state->outputTapes[j] = LogicalTapeImport(state->tapeset, j, &shared->tapes[j]); + } + + state->status = TSS_BUILDRUNS; +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} diff --git a/src/tuplesort96.c b/src/tuplesort96.c new file mode 100644 index 0000000000..d1c29b2f04 --- /dev/null +++ b/src/tuplesort96.c @@ -0,0 +1,4836 @@ +/*------------------------------------------------------------------------- + * + * tuplesort.c + * Generalized tuple sorting routines. + * + * This module handles sorting of heap tuples, index tuples, or single + * Datums (and could easily support other kinds of sortable objects, + * if necessary). It works efficiently for both small and large amounts + * of data. Small amounts are sorted in-memory using qsort(). Large + * amounts are sorted using temporary files and a standard external sort + * algorithm. + * + * See Knuth, volume 3, for more than you want to know about the external + * sorting algorithm. Historically, we divided the input into sorted runs + * using replacement selection, in the form of a priority tree implemented + * as a heap (essentially his Algorithm 5.2.3H), but now we only do that + * for the first run, and only if the run would otherwise end up being very + * short. We merge the runs using polyphase merge, Knuth's Algorithm + * 5.4.2D. The logical "tapes" used by Algorithm D are implemented by + * logtape.c, which avoids space wastage by recycling disk space as soon + * as each block is read from its "tape". + * + * We do not use Knuth's recommended data structure (Algorithm 5.4.1R) for + * the replacement selection, because it uses a fixed number of records + * in memory at all times. Since we are dealing with tuples that may vary + * considerably in size, we want to be able to vary the number of records + * kept in memory to ensure full utilization of the allowed sort memory + * space. So, we keep the tuples in a variable-size heap, with the next + * record to go out at the top of the heap. Like Algorithm 5.4.1R, each + * record is stored with the run number that it must go into, and we use + * (run number, key) as the ordering key for the heap. When the run number + * at the top of the heap changes, we know that no more records of the prior + * run are left in the heap. Note that there are in practice only ever two + * distinct run numbers, because since PostgreSQL 9.6, we only use + * replacement selection to form the first run. + * + * In PostgreSQL 9.6, a heap (based on Knuth's Algorithm H, with some small + * customizations) is only used with the aim of producing just one run, + * thereby avoiding all merging. Only the first run can use replacement + * selection, which is why there are now only two possible valid run + * numbers, and why heapification is customized to not distinguish between + * tuples in the second run (those will be quicksorted). We generally + * prefer a simple hybrid sort-merge strategy, where runs are sorted in much + * the same way as the entire input of an internal sort is sorted (using + * qsort()). The replacement_sort_tuples GUC controls the limited remaining + * use of replacement selection for the first run. + * + * There are several reasons to favor a hybrid sort-merge strategy. + * Maintaining a priority tree/heap has poor CPU cache characteristics. + * Furthermore, the growth in main memory sizes has greatly diminished the + * value of having runs that are larger than available memory, even in the + * case where there is partially sorted input and runs can be made far + * larger by using a heap. In most cases, a single-pass merge step is all + * that is required even when runs are no larger than available memory. + * Avoiding multiple merge passes was traditionally considered to be the + * major advantage of using replacement selection. + * + * The approximate amount of memory allowed for any one sort operation + * is specified in kilobytes by the caller (most pass work_mem). Initially, + * we absorb tuples and simply store them in an unsorted array as long as + * we haven't exceeded workMem. If we reach the end of the input without + * exceeding workMem, we sort the array using qsort() and subsequently return + * tuples just by scanning the tuple array sequentially. If we do exceed + * workMem, we begin to emit tuples into sorted runs in temporary tapes. + * When tuples are dumped in batch after quicksorting, we begin a new run + * with a new output tape (selected per Algorithm D). After the end of the + * input is reached, we dump out remaining tuples in memory into a final run + * (or two, when replacement selection is still used), then merge the runs + * using Algorithm D. + * + * When merging runs, we use a heap containing just the frontmost tuple from + * each source run; we repeatedly output the smallest tuple and insert the + * next tuple from its source tape (if any). When the heap empties, the merge + * is complete. The basic merge algorithm thus needs very little memory --- + * only M tuples for an M-way merge, and M is constrained to a small number. + * However, we can still make good use of our full workMem allocation by + * pre-reading additional tuples from each source tape. Without prereading, + * our access pattern to the temporary file would be very erratic; on average + * we'd read one block from each of M source tapes during the same time that + * we're writing M blocks to the output tape, so there is no sequentiality of + * access at all, defeating the read-ahead methods used by most Unix kernels. + * Worse, the output tape gets written into a very random sequence of blocks + * of the temp file, ensuring that things will be even worse when it comes + * time to read that tape. A straightforward merge pass thus ends up doing a + * lot of waiting for disk seeks. We can improve matters by prereading from + * each source tape sequentially, loading about workMem/M bytes from each tape + * in turn. Then we run the merge algorithm, writing but not reading until + * one of the preloaded tuple series runs out. Then we switch back to preread + * mode, fill memory again, and repeat. This approach helps to localize both + * read and write accesses. + * + * When the caller requests random access to the sort result, we form + * the final sorted run on a logical tape which is then "frozen", so + * that we can access it randomly. When the caller does not need random + * access, we return from tuplesort_performsort() as soon as we are down + * to one run per logical tape. The final merge is then performed + * on-the-fly as the caller repeatedly calls tuplesort_getXXX; this + * saves one cycle of writing all the data out to disk and reading it in. + * + * Before Postgres 8.2, we always used a seven-tape polyphase merge, on the + * grounds that 7 is the "sweet spot" on the tapes-to-passes curve according + * to Knuth's figure 70 (section 5.4.2). However, Knuth is assuming that + * tape drives are expensive beasts, and in particular that there will always + * be many more runs than tape drives. In our implementation a "tape drive" + * doesn't cost much more than a few Kb of memory buffers, so we can afford + * to have lots of them. In particular, if we can have as many tape drives + * as sorted runs, we can eliminate any repeated I/O at all. In the current + * code we determine the number of tapes M on the basis of workMem: we want + * workMem/M to be large enough that we read a fair amount of data each time + * we preread from a tape, so as to maintain the locality of access described + * above. Nonetheless, with large workMem we can have many tapes. + * + * + * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/sort/tuplesort.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/index.h" +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "utils/datum.h" +#include "utils/logtape.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/sortsupport.h" +#include "utils/tuplesort.h" + + +/* sort-type codes for sort__start probes */ +#define HEAP_SORT 0 +#define INDEX_SORT 1 +#define DATUM_SORT 2 +#define CLUSTER_SORT 3 + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +#ifdef DEBUG_BOUNDED_SORT +bool optimize_bounded_sort = true; +#endif + + +/* + * The objects we actually sort are SortTuple structs. These contain + * a pointer to the tuple proper (might be a MinimalTuple or IndexTuple), + * which is a separate palloc chunk --- we assume it is just one chunk and + * can be freed by a simple pfree() (except during final on-the-fly merge, + * when memory is used in batch). SortTuples also contain the tuple's + * first key column in Datum/nullflag format, and an index integer. + * + * Storing the first key column lets us save heap_getattr or index_getattr + * calls during tuple comparisons. We could extract and save all the key + * columns not just the first, but this would increase code complexity and + * overhead, and wouldn't actually save any comparison cycles in the common + * case where the first key determines the comparison result. Note that + * for a pass-by-reference datatype, datum1 points into the "tuple" storage. + * + * There is one special case: when the sort support infrastructure provides an + * "abbreviated key" representation, where the key is (typically) a pass by + * value proxy for a pass by reference type. In this case, the abbreviated key + * is stored in datum1 in place of the actual first key column. + * + * When sorting single Datums, the data value is represented directly by + * datum1/isnull1 for pass by value types (or null values). If the datatype is + * pass-by-reference and isnull1 is false, then "tuple" points to a separately + * palloc'd data value, otherwise "tuple" is NULL. The value of datum1 is then + * either the same pointer as "tuple", or is an abbreviated key value as + * described above. Accordingly, "tuple" is always used in preference to + * datum1 as the authoritative value for pass-by-reference cases. + * + * While building initial runs, tupindex holds the tuple's run number. + * Historically, the run number could meaningfully distinguish many runs, but + * it now only distinguishes RUN_FIRST and HEAP_RUN_NEXT, since replacement + * selection is always abandoned after the first run; no other run number + * should be represented here. During merge passes, we re-use it to hold the + * input tape number that each tuple in the heap was read from, or to hold the + * index of the next tuple pre-read from the same tape in the case of pre-read + * entries. tupindex goes unused if the sort occurs entirely in memory. + */ +typedef struct +{ + void *tuple; /* the tuple itself */ + Datum datum1; /* value of first key column */ + bool isnull1; /* is first key column NULL? */ + int tupindex; /* see notes above */ +} SortTuple; + + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 3 blocks + * worth of buffer space (which is an underestimate for very large data + * volumes, but it's probably close enough --- see logtape.c). + * + * MERGE_BUFFER_SIZE is how much data we'd like to read from each input + * tape during a preread cycle (see discussion at top of file). + */ +#define MINORDER 6 /* minimum merge order */ +#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3) +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + + /* + * Run numbers, used during external sort operations. + * + * HEAP_RUN_NEXT is only used for SortTuple.tupindex, never state.currentRun. + */ +#define RUN_FIRST 0 +#define HEAP_RUN_NEXT INT_MAX +#define RUN_SECOND 1 + +typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TupSortStatus status; /* enumerated value as shown above */ + int nKeys; /* number of columns in sort key */ + bool randomAccess; /* did caller request random access? */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + bool tuples; /* Can SortTuple.tuple ever be set? */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* number of tapes (Knuth's T) */ + int tapeRange; /* maxTapes-1 (Knuth's P) */ + MemoryContext sortcontext; /* memory context holding most sort data */ + MemoryContext tuplecontext; /* sub-context of sortcontext for tuple data */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * These function pointers decouple the routines that must know what kind + * of tuple we are sorting from the routines that don't need to know it. + * They are set up by the tuplesort_begin_xxx routines. + * + * Function to compare two tuples; result is per qsort() convention, ie: + * <0, 0, >0 according as ab. The API must match + * qsort_arg_comparator. + */ + SortTupleComparator comparetup; + + /* + * Function to copy a supplied input tuple into palloc'd space and set up + * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, + * state->availMem must be decreased by the amount of space used for the + * tuple copy (note the SortTuple struct itself is not counted). + */ + void (*copytup) (Tuplesortstate *state, SortTuple *stup, void *tup); + + /* + * Function to write a stored tuple onto tape. The representation of the + * tuple on tape need not be the same as it is in memory; requirements on + * the tape representation are given below. After writing the tuple, + * pfree() the out-of-line data (not the SortTuple struct!), and increase + * state->availMem by the amount of memory space thereby released. + */ + void (*writetup) (Tuplesortstate *state, int tapenum, + SortTuple *stup); + + /* + * Function to read a stored tuple from tape back into memory. 'len' is + * the already-read length of the stored tuple. Create a palloc'd copy, + * initialize tuple/datum1/isnull1 in the target SortTuple struct, and + * decrease state->availMem by the amount of memory space consumed. (See + * batchUsed notes for details on how memory is handled when incremental + * accounting is abandoned.) + */ + void (*readtup) (Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); + + /* + * Function to move a caller tuple. This is usually implemented as a + * memmove() shim, but function may also perform additional fix-up of + * caller tuple where needed. Batch memory support requires the movement + * of caller tuples from one location in memory to another. + */ + void (*movetup) (void *dest, void *src, unsigned int len); + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. (Note that memtupcount only counts the tuples that are part of the + * heap --- during merge passes, memtuples[] entries beyond tapeRange are + * never in the heap and are used to hold pre-read tuples.) In state + * SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated in batch, rather than + * incrementally. This implies that incremental memory accounting has + * been abandoned. Currently, this only happens for the final on-the-fly + * merge step. Large batch allocations can store tuples (e.g. + * IndexTuples) without palloc() fragmentation and other overhead. + */ + bool batchUsed; + + /* + * While building initial runs, this indicates if the replacement + * selection strategy is in use. When it isn't, then a simple hybrid + * sort-merge strategy is in use instead (runs are quicksorted). + */ + bool replaceActive; + + /* + * While building initial runs, this is the current output run number + * (starting at RUN_FIRST). Afterwards, it is the number of initial runs + * we made. + */ + int currentRun; + + /* + * Unless otherwise noted, all pointer variables below are pointers to + * arrays of length maxTapes, holding per-tape data. + */ + + /* + * These variables are only used during merge passes. mergeactive[i] is + * true if we are reading an input run from (actual) tape number i and + * have not yet exhausted that run. mergenext[i] is the memtuples index + * of the next pre-read tuple (next to be loaded into the heap) for tape + * i, or 0 if we are out of pre-read tuples. mergelast[i] similarly + * points to the last pre-read tuple from each tape. mergeavailslots[i] + * is the number of unused memtuples[] slots reserved for tape i, and + * mergeavailmem[i] is the amount of unused space allocated for tape i. + * mergefreelist and mergefirstfree keep track of unused locations in the + * memtuples[] array. The memtuples[].tupindex fields link together + * pre-read tuples for each tape as well as recycled locations in + * mergefreelist. It is OK to use 0 as a null link in these lists, because + * memtuples[0] is part of the merge heap and is never a pre-read tuple. + */ + bool *mergeactive; /* active input run source? */ + int *mergenext; /* first preread tuple for each source */ + int *mergelast; /* last preread tuple for each source */ + int *mergeavailslots; /* slots left for prereading each tape */ + int64 *mergeavailmem; /* availMem for prereading each tape */ + int mergefreelist; /* head of freelist of recycled slots */ + int mergefirstfree; /* first slot never used in this merge */ + + /* + * Per-tape batch state, when final on-the-fly merge consumes memory from + * just a few large allocations. + * + * Aside from the general benefits of performing fewer individual retail + * palloc() calls, this also helps make merging more cache efficient, + * since each tape's tuples must naturally be accessed sequentially (in + * sorted order). + */ + int64 spacePerTape; /* Space (memory) for tuples (not slots) */ + char **mergetuples; /* Each tape's memory allocation */ + char **mergecurrent; /* Current offset into each tape's memory */ + char **mergetail; /* Last item's start point for each tape */ + char **mergeoverflow; /* Retail palloc() "overflow" for each tape */ + + /* + * Variables for Algorithm D. Note that destTape is a "logical" tape + * number, ie, an index into the tp_xxx[] arrays. Be careful to keep + * "logical" and "actual" tape numbers straight! + */ + int Level; /* Knuth's l */ + int destTape; /* current output tape (Knuth's j, less 1) */ + int *tp_fib; /* Target Fibonacci run counts (A[]) */ + int *tp_runs; /* # of real runs on each tape */ + int *tp_dummy; /* # of dummy runs for each tape (D[]) */ + int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ + int activeTapes; /* # of active input tapes in merge pass */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + int result_tape; /* actual tape number of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * The sortKeys variable is used by every case other than the hash index + * case; it is set by tuplesort_begin_xxx. tupDesc is only used by the + * MinimalTuple and CLUSTER routines, though. + */ + TupleDesc tupDesc; + SortSupport sortKeys; /* array of length nKeys */ + + /* + * This variable is shared by the single-key MinimalTuple case and the + * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + */ + SortSupport onlyKey; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * These variables are specific to the CLUSTER case; they are set by + * tuplesort_begin_cluster. + */ + IndexInfo *indexInfo; /* info about index being used for reference */ + EState *estate; /* for evaluating index expressions */ + + /* + * These variables are specific to the IndexTuple case; they are set by + * tuplesort_begin_index_xxx and used only by the IndexTuple routines. + */ + Relation heapRel; /* table the index is being built on */ + Relation indexRel; /* index being built */ + + /* These are specific to the index_btree subcase: */ + bool enforceUnique; /* complain if we find duplicate tuples */ + + /* These are specific to the index_hash subcase: */ + uint32 hash_mask; /* mask for sortable part of hash code */ + + /* + * These variables are specific to the Datum case; they are set by + * tuplesort_begin_datum and used only by the DatumTuple routines. + */ + Oid datumType; + /* we need typelen in order to know how to copy the Datums. */ + int datumTypeLen; + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) +#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) +#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) +#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) +#define MOVETUP(dest,src,len) ((*(state)->movetup) (dest, src, len)) +#define LACKMEM(state) ((state)->availMem < 0 && !(state)->batchUsed) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define FREEMEM(state,amt) ((state)->availMem += (amt)) + +/* + * NOTES about on-tape representation of tuples: + * + * We require the first "unsigned int" of a stored tuple to be the total size + * on-tape of the tuple, including itself (so it is never zero; an all-zero + * unsigned int is used to delimit runs). The remainder of the stored tuple + * may or may not match the in-memory representation of the tuple --- + * any conversion needed is the job of the writetup and readtup routines. + * + * If state->randomAccess is true, then the stored representation of the + * tuple must be followed by another "unsigned int" that is a copy of the + * length --- so the total tape space used is actually sizeof(unsigned int) + * more than the stored length value. This allows read-backwards. When + * randomAccess is not true, the write/read routines may omit the extra + * length word. + * + * writetup is expected to write both length words as well as the tuple + * data. When readtup is called, the tape is positioned just after the + * front length word; readtup must read the tuple data and advance past + * the back length word (if present). + * + * The write/read routines can make use of the tuple description data + * stored in the Tuplesortstate record, if needed. They are also expected + * to adjust state->availMem by the amount of memory space (not tape space!) + * released or consumed. There is no error return from either writetup + * or readtup; they should ereport() on failure. + * + * + * NOTES about memory consumption calculations: + * + * We count space allocated for tuples against the workMem limit, plus + * the space used by the variable-size memtuples array. Fixed-size space + * is not counted; it's small enough to not be interesting. + * + * Note that we count actual space used (as shown by GetMemoryChunkSpace) + * rather than the originally-requested size. This is important since + * palloc can add substantial overhead. It's not a complete answer since + * we won't count any wasted space in palloc allocation blocks, but it's + * a lot better than what we were doing before 7.3. As of 9.6, a + * separate memory context is used for caller passed tuples. Resetting + * it at certain key increments significantly ameliorates fragmentation. + * Note that this places a responsibility on readtup and copytup routines + * to use the right memory context for these tuples (and to not use the + * reset context for anything whose lifetime needs to span multiple + * external sort runs). + */ + +/* When using this macro, beware of double evaluation of len */ +#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ + do { \ + if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ + elog(ERROR, "unexpected end of data"); \ + } while(0) + + +static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess); +static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); +static bool consider_abort_common(Tuplesortstate *state); +static bool useselection(Tuplesortstate *state); +static void inittapes(Tuplesortstate *state); +static void selectnewtape(Tuplesortstate *state); +static void mergeruns(Tuplesortstate *state); +static void mergeonerun(Tuplesortstate *state); +static void beginmerge(Tuplesortstate *state, bool finalMergeBatch); +static void batchmemtuples(Tuplesortstate *state); +static void mergebatch(Tuplesortstate *state, int64 spacePerTape); +static void mergebatchone(Tuplesortstate *state, int srcTape, + SortTuple *stup, bool *should_free); +static void mergebatchfreetape(Tuplesortstate *state, int srcTape, + SortTuple *rtup, bool *should_free); +static void *mergebatchalloc(Tuplesortstate *state, int tapenum, Size tuplen); +static void mergepreread(Tuplesortstate *state); +static void mergeprereadone(Tuplesortstate *state, int srcTape); +static void dumptuples(Tuplesortstate *state, bool alltuples); +static void dumpbatch(Tuplesortstate *state, bool alltuples); +static void make_bounded_heap(Tuplesortstate *state); +static void sort_bounded_heap(Tuplesortstate *state); +static void tuplesort_sort_memtuples(Tuplesortstate *state); +static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple, + int tupleindex, bool checkIndex); +static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex); +static void reversedirection(Tuplesortstate *state); +static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK); +static void markrunend(Tuplesortstate *state, int tapenum); +static void *readtup_alloc(Tuplesortstate *state, int tapenum, Size tuplen); +static int comparetup_heap(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_heap(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static void movetup_heap(void *dest, void *src, unsigned int len); +static int comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_cluster(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static void movetup_cluster(void *dest, void *src, unsigned int len); +static int comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static int comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_index(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static void movetup_index(void *dest, void *src, unsigned int len); +static int comparetup_datum(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state); +static void copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup); +static void writetup_datum(Tuplesortstate *state, int tapenum, + SortTuple *stup); +static void readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len); +static void movetup_datum(void *dest, void *src, unsigned int len); +static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); + +/* + * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts + * any variant of SortTuples, using the appropriate comparetup function. + * qsort_ssup() is specialized for the case where the comparetup function + * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts + * and Datum sorts. + */ +#include "qsort_tuple.c" + + +/* + * tuplesort_begin_xxx + * + * Initialize for a tuple sort operation. + * + * After calling tuplesort_begin, the caller should call tuplesort_putXXX + * zero or more times, then call tuplesort_performsort when all the tuples + * have been supplied. After performsort, retrieve the tuples in sorted + * order by calling tuplesort_getXXX until it returns false/NULL. (If random + * access was requested, rescan, markpos, and restorepos can also be called.) + * Call tuplesort_end to terminate the operation and release memory/disk space. + * + * Each variant of tuplesort_begin has a workMem parameter specifying the + * maximum number of kilobytes of RAM to use before spilling data to disk. + * (The normal value of this parameter is work_mem, but some callers use + * other values.) Each variant also has a randomAccess parameter specifying + * whether the caller needs non-sequential access to the sort result. + */ + +static Tuplesortstate * +tuplesort_begin_common(int workMem, bool randomAccess) +{ + Tuplesortstate *state; + MemoryContext sortcontext; + MemoryContext tuplecontext; + MemoryContext oldcontext; + + /* + * Create a working memory context for this sort operation. All data + * needed by the sort will live inside this context. + */ + sortcontext = AllocSetContextCreate(CurrentMemoryContext, + "TupleSort main", + ALLOCSET_DEFAULT_SIZES); + + /* + * Caller tuple (e.g. IndexTuple) memory context. + * + * A dedicated child context used exclusively for caller passed tuples + * eases memory management. Resetting at key points reduces + * fragmentation. Note that the memtuples array of SortTuples is allocated + * in the parent context, not this context, because there is no need to + * free memtuples early. + */ + tuplecontext = AllocSetContextCreate(sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + + /* + * Make the Tuplesortstate within the per-sort context. This way, we + * don't need a separate pfree() operation for it at shutdown. + */ + oldcontext = MemoryContextSwitchTo(sortcontext); + + state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate)); + +#ifdef TRACE_SORT + if (trace_sort) + pg_rusage_init(&state->ru_start); +#endif + + state->status = TSS_INITIAL; + state->randomAccess = randomAccess; + state->bounded = false; + state->tuples = true; + state->boundUsed = false; + state->allowedMem = workMem * (int64) 1024; + state->availMem = state->allowedMem; + state->sortcontext = sortcontext; + state->tuplecontext = tuplecontext; + state->tapeset = NULL; + + state->memtupcount = 0; + + /* + * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; + * see comments in grow_memtuples(). + */ + state->memtupsize = Max(1024, + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); + + state->growmemtuples = true; + state->batchUsed = false; + state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + + /* workMem must be large enough for the minimal memtuples array */ + if (LACKMEM(state)) + elog(ERROR, "insufficient memory allowed for sort"); + + state->currentRun = RUN_FIRST; + + /* + * maxTapes, tapeRange, and Algorithm D variables will be initialized by + * inittapes(), if needed + */ + + state->result_tape = -1; /* flag that result tape has not been formed */ + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_heap(TupleDesc tupDesc, + int nkeys, AttrNumber *attNums, + Oid *sortOperators, Oid *sortCollations, + bool *nullsFirstFlags, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + + AssertArg(nkeys > 0); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + nkeys, workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = nkeys; + + TRACE_POSTGRESQL_SORT_START(HEAP_SORT, + false, /* no unique check */ + nkeys, + workMem, + randomAccess); + + state->comparetup = comparetup_heap; + state->copytup = copytup_heap; + state->writetup = writetup_heap; + state->readtup = readtup_heap; + state->movetup = movetup_heap; + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + state->abbrevNext = 10; + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData)); + + for (i = 0; i < nkeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + + AssertArg(attNums[i] != 0); + AssertArg(sortOperators[i] != 0); + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = sortCollations[i]; + sortKey->ssup_nulls_first = nullsFirstFlags[i]; + sortKey->ssup_attno = attNums[i]; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); + } + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (nkeys == 1 && !state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_cluster(TupleDesc tupDesc, + Relation indexRel, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + ScanKey indexScanKey; + MemoryContext oldcontext; + int i; + + Assert(indexRel->rd_rel->relam == BTREE_AM_OID); + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", + RelationGetNumberOfAttributes(indexRel), + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = RelationGetNumberOfAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(CLUSTER_SORT, + false, /* no unique check */ + state->nKeys, + workMem, + randomAccess); + + state->comparetup = comparetup_cluster; + state->copytup = copytup_cluster; + state->writetup = writetup_cluster; + state->readtup = readtup_cluster; + state->movetup = movetup_cluster; + state->abbrevNext = 10; + + state->indexInfo = BuildIndexInfo(indexRel); + + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ + + indexScanKey = _bt_mkscankey_nodata(indexRel); + + if (state->indexInfo->ii_Expressions != NULL) + { + TupleTableSlot *slot; + ExprContext *econtext; + + /* + * We will need to use FormIndexDatum to evaluate the index + * expressions. To do that, we need an EState, as well as a + * TupleTableSlot to put the table tuples into. The econtext's + * scantuple has to point to that slot, too. + */ + state->estate = CreateExecutorState(); + slot = MakeSingleTupleTableSlot(tupDesc); + econtext = GetPerTupleExprContext(state->estate); + econtext->ecxt_scantuple = slot; + } + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + _bt_freeskey(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_btree(Relation heapRel, + Relation indexRel, + bool enforceUnique, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + ScanKey indexScanKey; + MemoryContext oldcontext; + int i; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: unique = %c, workMem = %d, randomAccess = %c", + enforceUnique ? 't' : 'f', + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = RelationGetNumberOfAttributes(indexRel); + + TRACE_POSTGRESQL_SORT_START(INDEX_SORT, + enforceUnique, + state->nKeys, + workMem, + randomAccess); + + state->comparetup = comparetup_index_btree; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->movetup = movetup_index; + state->abbrevNext = 10; + + state->heapRel = heapRel; + state->indexRel = indexRel; + state->enforceUnique = enforceUnique; + + indexScanKey = _bt_mkscankey_nodata(indexRel); + state->nKeys = RelationGetNumberOfAttributes(indexRel); + + /* Prepare SortSupport data for each column */ + state->sortKeys = (SortSupport) palloc0(state->nKeys * + sizeof(SortSupportData)); + + for (i = 0; i < state->nKeys; i++) + { + SortSupport sortKey = state->sortKeys + i; + ScanKey scanKey = indexScanKey + i; + int16 strategy; + + sortKey->ssup_cxt = CurrentMemoryContext; + sortKey->ssup_collation = scanKey->sk_collation; + sortKey->ssup_nulls_first = + (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; + sortKey->ssup_attno = scanKey->sk_attno; + /* Convey if abbreviation optimization is applicable in principle */ + sortKey->abbreviate = (i == 0); + + AssertState(sortKey->ssup_attno != 0); + + strategy = (scanKey->sk_flags & SK_BT_DESC) != 0 ? + BTGreaterStrategyNumber : BTLessStrategyNumber; + + PrepareSortSupportFromIndexRel(indexRel, strategy, sortKey); + } + + _bt_freeskey(indexScanKey); + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_index_hash(Relation heapRel, + Relation indexRel, + uint32 hash_mask, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin index sort: hash_mask = 0x%x, workMem = %d, randomAccess = %c", + hash_mask, + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* Only one sort column, the hash code */ + + state->comparetup = comparetup_index_hash; + state->copytup = copytup_index; + state->writetup = writetup_index; + state->readtup = readtup_index; + state->movetup = movetup_index; + + state->heapRel = heapRel; + state->indexRel = indexRel; + + state->hash_mask = hash_mask; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +Tuplesortstate * +tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, + bool nullsFirstFlag, + int workMem, bool randomAccess) +{ + Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); + MemoryContext oldcontext; + int16 typlen; + bool typbyval; + + oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, + "begin datum sort: workMem = %d, randomAccess = %c", + workMem, randomAccess ? 't' : 'f'); +#endif + + state->nKeys = 1; /* always a one-column sort */ + + TRACE_POSTGRESQL_SORT_START(DATUM_SORT, + false, /* no unique check */ + 1, + workMem, + randomAccess); + + state->comparetup = comparetup_datum; + state->copytup = copytup_datum; + state->writetup = writetup_datum; + state->readtup = readtup_datum; + state->movetup = movetup_datum; + state->abbrevNext = 10; + + state->datumType = datumType; + + /* lookup necessary attributes of the datum type */ + get_typlenbyval(datumType, &typlen, &typbyval); + state->datumTypeLen = typlen; + state->tuples = !typbyval; + + /* Prepare SortSupport data */ + state->sortKeys = (SortSupport) palloc0(sizeof(SortSupportData)); + + state->sortKeys->ssup_cxt = CurrentMemoryContext; + state->sortKeys->ssup_collation = sortCollation; + state->sortKeys->ssup_nulls_first = nullsFirstFlag; + + /* + * Abbreviation is possible here only for by-reference types. In theory, + * a pass-by-value datatype could have an abbreviated form that is cheaper + * to compare. In a tuple sort, we could support that, because we can + * always extract the original datum from the tuple is needed. Here, we + * can't, because a datum sort only stores a single copy of the datum; the + * "tuple" field of each sortTuple is NULL. + */ + state->sortKeys->abbreviate = !typbyval; + + PrepareSortSupportFromOrderingOp(sortOperator, state->sortKeys); + + /* + * The "onlyKey" optimization cannot be used with abbreviated keys, since + * tie-breaker comparisons may be required. Typically, the optimization + * is only of value to pass-by-value types anyway, whereas abbreviated + * keys are typically only of value to pass-by-reference types. + */ + if (!state->sortKeys->abbrev_converter) + state->onlyKey = state->sortKeys; + + MemoryContextSwitchTo(oldcontext); + + return state; +} + +/* + * tuplesort_set_bound + * + * Advise tuplesort that at most the first N result tuples are required. + * + * Must be called before inserting any tuples. (Actually, we could allow it + * as long as the sort hasn't spilled to disk, but there seems no need for + * delayed calls at the moment.) + * + * This is a hint only. The tuplesort may still return more tuples than + * requested. + */ +void +tuplesort_set_bound(Tuplesortstate *state, int64 bound) +{ + /* Assert we're called before loading any tuples */ + Assert(state->status == TSS_INITIAL); + Assert(state->memtupcount == 0); + Assert(!state->bounded); + +#ifdef DEBUG_BOUNDED_SORT + /* Honor GUC setting that disables the feature (for easy testing) */ + if (!optimize_bounded_sort) + return; +#endif + + /* We want to be able to compute bound * 2, so limit the setting */ + if (bound > (int64) (INT_MAX / 2)) + return; + + state->bounded = true; + state->bound = (int) bound; + + /* + * Bounded sorts are not an effective target for abbreviated key + * optimization. Disable by setting state to be consistent with no + * abbreviation support. + */ + state->sortKeys->abbrev_converter = NULL; + if (state->sortKeys->abbrev_full_comparator) + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; +} + +/* + * tuplesort_end + * + * Release resources and clean up. + * + * NOTE: after calling this, any pointers returned by tuplesort_getXXX are + * pointing to garbage. Be careful not to attempt to use or free such + * pointers afterwards! + */ +void +tuplesort_end(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "external sort ended, %ld disk blocks used: %s", + spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "internal sort ended, %ld KB used: %s", + spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + /* Free any execution state created for CLUSTER case */ + if (state->estate != NULL) + { + ExprContext *econtext = GetPerTupleExprContext(state->estate); + + ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); + FreeExecutorState(state->estate); + } + + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory, + * including the Tuplesortstate struct itself. + */ + MemoryContextDelete(state->sortcontext); +} + +/* + * Grow the memtuples[] array, if possible within our memory constraint. We + * must not exceed INT_MAX tuples in memory or the caller-provided memory + * limit. Return TRUE if we were able to enlarge the array, FALSE if not. + * + * Normally, at each increment we double the size of the array. When doing + * that would exceed a limit, we attempt one last, smaller increase (and then + * clear the growmemtuples flag so we don't try any more). That allows us to + * use memory as fully as permitted; sticking to the pure doubling rule could + * result in almost half going unused. Because availMem moves around with + * tuple addition/removal, we need some rule to prevent making repeated small + * increases in memtupsize, which would just be useless thrashing. The + * growmemtuples flag accomplishes that and also prevents useless + * recalculations in this function. + */ +static bool +grow_memtuples(Tuplesortstate *state) +{ + int newmemtupsize; + int memtupsize = state->memtupsize; + int64 memNowUsed = state->allowedMem - state->availMem; + + /* Forget it if we've already maxed out memtuples, per comment above */ + if (!state->growmemtuples) + return false; + + /* Select new value of memtupsize */ + if (memNowUsed <= state->availMem) + { + /* + * We've used no more than half of allowedMem; double our usage, + * clamping at INT_MAX tuples. + */ + if (memtupsize < INT_MAX / 2) + newmemtupsize = memtupsize * 2; + else + { + newmemtupsize = INT_MAX; + state->growmemtuples = false; + } + } + else + { + /* + * This will be the last increment of memtupsize. Abandon doubling + * strategy and instead increase as much as we safely can. + * + * To stay within allowedMem, we can't increase memtupsize by more + * than availMem / sizeof(SortTuple) elements. In practice, we want + * to increase it by considerably less, because we need to leave some + * space for the tuples to which the new array slots will refer. We + * assume the new tuples will be about the same size as the tuples + * we've already seen, and thus we can extrapolate from the space + * consumption so far to estimate an appropriate new size for the + * memtuples array. The optimal value might be higher or lower than + * this estimate, but it's hard to know that in advance. We again + * clamp at INT_MAX tuples. + * + * This calculation is safe against enlarging the array so much that + * LACKMEM becomes true, because the memory currently used includes + * the present array; thus, there would be enough allowedMem for the + * new array elements even if no other memory were currently used. + * + * We do the arithmetic in float8, because otherwise the product of + * memtupsize and allowedMem could overflow. Any inaccuracy in the + * result should be insignificant; but even if we computed a + * completely insane result, the checks below will prevent anything + * really bad from happening. + */ + double grow_ratio; + + grow_ratio = (double) state->allowedMem / (double) memNowUsed; + if (memtupsize * grow_ratio < INT_MAX) + newmemtupsize = (int) (memtupsize * grow_ratio); + else + newmemtupsize = INT_MAX; + + /* We won't make any further enlargement attempts */ + state->growmemtuples = false; + } + + /* Must enlarge array by at least one element, else report failure */ + if (newmemtupsize <= memtupsize) + goto noalloc; + + /* + * On a 32-bit machine, allowedMem could exceed MaxAllocHugeSize. Clamp + * to ensure our request won't be rejected. Note that we can easily + * exhaust address space before facing this outcome. (This is presently + * impossible due to guc.c's MAX_KILOBYTES limitation on work_mem, but + * don't rely on that at this distance.) + */ + if ((Size) newmemtupsize >= MaxAllocHugeSize / sizeof(SortTuple)) + { + newmemtupsize = (int) (MaxAllocHugeSize / sizeof(SortTuple)); + state->growmemtuples = false; /* can't grow any more */ + } + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the space management algorithm will go nuts. The code above should + * never generate a dangerous request, but to be safe, check explicitly + * that the array growth fits within availMem. (We could still cause + * LACKMEM if the memory chunk overhead associated with the memtuples + * array were to increase. That shouldn't happen because we chose the + * initial array size large enough to ensure that palloc will be treating + * both old and new arrays as separate chunks. But we'll check LACKMEM + * explicitly below just in case.) + */ + if (state->availMem < (int64) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) + goto noalloc; + + /* OK, do it */ + FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); + state->memtupsize = newmemtupsize; + state->memtuples = (SortTuple *) + repalloc_huge(state->memtuples, + state->memtupsize * sizeof(SortTuple)); + USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + return true; + +noalloc: + /* If for any reason we didn't realloc, shut off future attempts */ + state->growmemtuples = false; + return false; +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_puttupleslot(Tuplesortstate *state, TupleTableSlot *slot) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) slot); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one tuple while collecting input data for sort. + * + * Note that the input data is always copied; the caller need not save it. + */ +void +tuplesort_putheaptuple(Tuplesortstate *state, HeapTuple tup) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + /* + * Copy the given tuple into memory we control, and decrease availMem. + * Then call the common code. + */ + COPYTUP(state, &stup, (void *) tup); + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Collect one index tuple while collecting input data for sort, building + * it from caller-supplied values. + */ +void +tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel, + ItemPointer self, Datum *values, + bool *isnull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + Datum original; + IndexTuple tuple; + + stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull); + tuple = ((IndexTuple) stup.tuple); + tuple->t_tid = *self; + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + /* set up first-column key value */ + original = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup.isnull1); + + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys || !state->sortKeys->abbrev_converter || stup.isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Accept one Datum while collecting input data for sort. + * + * If the Datum is pass-by-ref type, the value will be copied. + */ +void +tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + SortTuple stup; + + /* + * Pass-by-value types or null values are just stored directly in + * stup.datum1 (and stup.tuple is not used and set to NULL). + * + * Non-null pass-by-reference values need to be copied into memory we + * control, and possibly abbreviated. The copied value is pointed to by + * stup.tuple and is treated as the canonical copy (e.g. to return via + * tuplesort_getdatum or when writing to tape); stup.datum1 gets the + * abbreviated value if abbreviation is happening, otherwise it's + * identical to stup.tuple. + */ + + if (isNull || !state->tuples) + { + /* + * Set datum1 to zeroed representation for NULLs (to be consistent, + * and to support cheap inequality tests for NULL abbreviated keys). + */ + stup.datum1 = !isNull ? val : (Datum) 0; + stup.isnull1 = isNull; + stup.tuple = NULL; /* no separate storage */ + MemoryContextSwitchTo(state->sortcontext); + } + else + { + Datum original = datumCopy(val, false, state->datumTypeLen); + + stup.isnull1 = false; + stup.tuple = DatumGetPointer(original); + USEMEM(state, GetMemoryChunkSpace(stup.tuple)); + MemoryContextSwitchTo(state->sortcontext); + + if (!state->sortKeys->abbrev_converter) + { + stup.datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup.datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup.datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any + * case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + mtup->datum1 = PointerGetDatum(mtup->tuple); + } + } + } + + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Shared code for tuple and datum cases. + */ +static void +puttuple_common(Tuplesortstate *state, SortTuple *tuple) +{ + switch (state->status) + { + case TSS_INITIAL: + + /* + * Save the tuple into the unsorted array. First, grow the array + * as needed. Note that we try to grow the array when there is + * still one free slot remaining --- if we fail, there'll still be + * room to store the incoming tuple, and then we'll switch to + * tape-based operation. + */ + if (state->memtupcount >= state->memtupsize - 1) + { + (void) grow_memtuples(state); + Assert(state->memtupcount < state->memtupsize); + } + state->memtuples[state->memtupcount++] = *tuple; + + /* + * Check if it's time to switch over to a bounded heapsort. We do + * so if the input tuple count exceeds twice the desired tuple + * count (this is a heuristic for where heapsort becomes cheaper + * than a quicksort), or if we've just filled workMem and have + * enough tuples to meet the bound. + * + * Note that once we enter TSS_BOUNDED state we will always try to + * complete the sort that way. In the worst case, if later input + * tuples are larger than earlier ones, this might cause us to + * exceed workMem significantly. + */ + if (state->bounded && + (state->memtupcount > state->bound * 2 || + (state->memtupcount > state->bound && LACKMEM(state)))) + { +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to bounded heapsort at %d tuples: %s", + state->memtupcount, + pg_rusage_show(&state->ru_start)); +#endif + make_bounded_heap(state); + return; + } + + /* + * Done if we still fit in available memory and have array slots. + */ + if (state->memtupcount < state->memtupsize && !LACKMEM(state)) + return; + + /* + * Nope; time to switch to tape-based operation. + */ + inittapes(state); + + /* + * Dump tuples until we are back under the limit. + */ + dumptuples(state, false); + break; + + case TSS_BOUNDED: + + /* + * We don't want to grow the array here, so check whether the new + * tuple can be discarded before putting it in. This should be a + * good speed optimization, too, since when there are many more + * input tuples than the bound, most input tuples can be discarded + * with just this one comparison. Note that because we currently + * have the sort direction reversed, we must check for <= not >=. + */ + if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) + { + /* new tuple <= top of the heap, so we can discard it */ + free_sort_tuple(state, tuple); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* discard top of heap, sift up, insert new tuple */ + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_siftup(state, false); + tuplesort_heap_insert(state, tuple, 0, false); + } + break; + + case TSS_BUILDRUNS: + + /* + * Insert the tuple into the heap, with run number currentRun if + * it can go into the current run, else HEAP_RUN_NEXT. The tuple + * can go into the current run if it is >= the first + * not-yet-output tuple. (Actually, it could go into the current + * run if it is >= the most recently output tuple ... but that + * would require keeping around the tuple we last output, and it's + * simplest to let writetup free each tuple as soon as it's + * written.) + * + * Note that this only applies when: + * + * - currentRun is RUN_FIRST + * + * - Replacement selection is in use (typically it is never used). + * + * When these two conditions are not both true, all tuples are + * appended indifferently, much like the TSS_INITIAL case. + * + * There should always be room to store the incoming tuple. + */ + Assert(!state->replaceActive || state->memtupcount > 0); + if (state->replaceActive && + COMPARETUP(state, tuple, &state->memtuples[0]) >= 0) + { + Assert(state->currentRun == RUN_FIRST); + + /* + * Insert tuple into first, fully heapified run. + * + * Unlike classic replacement selection, which this module was + * previously based on, only RUN_FIRST tuples are fully + * heapified. Any second/next run tuples are appended + * indifferently. While HEAP_RUN_NEXT tuples may be sifted + * out of the way of first run tuples, COMPARETUP() will never + * be called for the run's tuples during sifting (only our + * initial COMPARETUP() call is required for the tuple, to + * determine that the tuple does not belong in RUN_FIRST). + */ + tuplesort_heap_insert(state, tuple, state->currentRun, true); + } + else + { + /* + * Tuple was determined to not belong to heapified RUN_FIRST, + * or replacement selection not in play. Append the tuple to + * memtuples indifferently. + * + * dumptuples() does not trust that the next run's tuples are + * heapified. Anything past the first run will always be + * quicksorted even when replacement selection is initially + * used. (When it's never used, every tuple still takes this + * path.) + */ + tuple->tupindex = HEAP_RUN_NEXT; + state->memtuples[state->memtupcount++] = *tuple; + } + + /* + * If we are over the memory limit, dump tuples till we're under. + */ + dumptuples(state, false); + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } +} + +static bool +consider_abort_common(Tuplesortstate *state) +{ + Assert(state->sortKeys[0].abbrev_converter != NULL); + Assert(state->sortKeys[0].abbrev_abort != NULL); + Assert(state->sortKeys[0].abbrev_full_comparator != NULL); + + /* + * Check effectiveness of abbreviation optimization. Consider aborting + * when still within memory limit. + */ + if (state->status == TSS_INITIAL && + state->memtupcount >= state->abbrevNext) + { + state->abbrevNext *= 2; + + /* + * Check opclass-supplied abbreviation abort routine. It may indicate + * that abbreviation should not proceed. + */ + if (!state->sortKeys->abbrev_abort(state->memtupcount, + state->sortKeys)) + return false; + + /* + * Finally, restore authoritative comparator, and indicate that + * abbreviation is not in play by setting abbrev_converter to NULL + */ + state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator; + state->sortKeys[0].abbrev_converter = NULL; + /* Not strictly necessary, but be tidy */ + state->sortKeys[0].abbrev_abort = NULL; + state->sortKeys[0].abbrev_full_comparator = NULL; + + /* Give up - expect original pass-by-value representation */ + return true; + } + + return false; +} + +/* + * All tuples have been provided; finish the sort. + */ +void +tuplesort_performsort(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "performsort starting: %s", + pg_rusage_show(&state->ru_start)); +#endif + + switch (state->status) + { + case TSS_INITIAL: + + /* + * We were able to accumulate all the tuples within the allowed + * amount of memory. Just qsort 'em and we're done. + */ + tuplesort_sort_memtuples(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BOUNDED: + + /* + * We were able to accumulate all the tuples required for output + * in memory, using a heap to eliminate excess tuples. Now we + * have to transform the heap to a properly-sorted array. + */ + sort_bounded_heap(state); + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + state->status = TSS_SORTEDINMEM; + break; + + case TSS_BUILDRUNS: + + /* + * Finish tape-based sort. First, flush all tuples remaining in + * memory out to tape; then merge until we have a single remaining + * run (or, if !randomAccess, one run per tape). Note that + * mergeruns sets the correct state->status. + */ + dumptuples(state, true); + mergeruns(state); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->status == TSS_FINALMERGE) + elog(LOG, "performsort done (except %d-way final merge): %s", + state->activeTapes, + pg_rusage_show(&state->ru_start)); + else + elog(LOG, "performsort done: %s", + pg_rusage_show(&state->ru_start)); + } +#endif + + MemoryContextSwitchTo(oldcontext); +} + +/* + * Internal routine to fetch the next tuple in either forward or back + * direction into *stup. Returns FALSE if no more tuples. + * If *should_free is set, the caller must pfree stup.tuple when done with it. + * Otherwise, caller should not use tuple following next call here. + * + * Note: Public tuplesort fetch routine callers cannot rely on tuple being + * allocated in their own memory context when should_free is TRUE. It may be + * necessary to create a new copy of the tuple to meet the requirements of + * public fetch routine callers. + */ +static bool +tuplesort_gettuple_common(Tuplesortstate *state, bool forward, + SortTuple *stup, bool *should_free) +{ + unsigned int tuplen; + + switch (state->status) + { + case TSS_SORTEDINMEM: + Assert(forward || state->randomAccess); + Assert(!state->batchUsed); + *should_free = false; + if (forward) + { + if (state->current < state->memtupcount) + { + *stup = state->memtuples[state->current++]; + return true; + } + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + } + else + { + if (state->current <= 0) + return false; + + /* + * if all tuples are fetched already then we return last + * tuple, else - tuple before last returned. + */ + if (state->eof_reached) + state->eof_reached = false; + else + { + state->current--; /* last returned tuple */ + if (state->current <= 0) + return false; + } + *stup = state->memtuples[state->current - 1]; + return true; + } + break; + + case TSS_SORTEDONTAPE: + Assert(forward || state->randomAccess); + Assert(!state->batchUsed); + *should_free = true; + if (forward) + { + if (state->eof_reached) + return false; + if ((tuplen = getlen(state, state->result_tape, true)) != 0) + { + READTUP(state, stup, state->result_tape, tuplen); + return true; + } + else + { + state->eof_reached = true; + return false; + } + } + + /* + * Backward. + * + * if all tuples are fetched already then we return last tuple, + * else - tuple before last returned. + */ + if (state->eof_reached) + { + /* + * Seek position is pointing just past the zero tuplen at the + * end of file; back up to fetch last tuple's ending length + * word. If seek fails we must have a completely empty file. + */ + if (!LogicalTapeBackspace(state->tapeset, + state->result_tape, + 2 * sizeof(unsigned int))) + return false; + state->eof_reached = false; + } + else + { + /* + * Back up and fetch previously-returned tuple's ending length + * word. If seek fails, assume we are at start of file. + */ + if (!LogicalTapeBackspace(state->tapeset, + state->result_tape, + sizeof(unsigned int))) + return false; + tuplen = getlen(state, state->result_tape, false); + + /* + * Back up to get ending length word of tuple before it. + */ + if (!LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + 2 * sizeof(unsigned int))) + { + /* + * If that fails, presumably the prev tuple is the first + * in the file. Back up so that it becomes next to read + * in forward direction (not obviously right, but that is + * what in-memory case does). + */ + if (!LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen + sizeof(unsigned int))) + elog(ERROR, "bogus tuple length in backward scan"); + return false; + } + } + + tuplen = getlen(state, state->result_tape, false); + + /* + * Now we have the length of the prior tuple, back up and read it. + * Note: READTUP expects we are positioned after the initial + * length word of the tuple, so back up to that point. + */ + if (!LogicalTapeBackspace(state->tapeset, + state->result_tape, + tuplen)) + elog(ERROR, "bogus tuple length in backward scan"); + READTUP(state, stup, state->result_tape, tuplen); + return true; + + case TSS_FINALMERGE: + Assert(forward); + Assert(state->batchUsed || !state->tuples); + /* For now, assume tuple is stored in tape's batch memory */ + *should_free = false; + + /* + * This code should match the inner loop of mergeonerun(). + */ + if (state->memtupcount > 0) + { + int srcTape = state->memtuples[0].tupindex; + int tupIndex; + SortTuple *newtup; + + /* + * Returned tuple is still counted in our memory space most of + * the time. See mergebatchone() for discussion of why caller + * may occasionally be required to free returned tuple, and + * how preread memory is managed with regard to edge cases + * more generally. + */ + *stup = state->memtuples[0]; + tuplesort_heap_siftup(state, false); + if ((tupIndex = state->mergenext[srcTape]) == 0) + { + /* + * out of preloaded data on this tape, try to read more + * + * Unlike mergeonerun(), we only preload from the single + * tape that's run dry, though not before preparing its + * batch memory for a new round of sequential consumption. + * See mergepreread() comments. + */ + if (state->batchUsed) + mergebatchone(state, srcTape, stup, should_free); + + mergeprereadone(state, srcTape); + + /* + * if still no data, we've reached end of run on this tape + */ + if ((tupIndex = state->mergenext[srcTape]) == 0) + { + /* Free tape's buffer, avoiding dangling pointer */ + if (state->batchUsed) + mergebatchfreetape(state, srcTape, stup, should_free); + return true; + } + } + /* pull next preread tuple from list, insert in heap */ + newtup = &state->memtuples[tupIndex]; + state->mergenext[srcTape] = newtup->tupindex; + if (state->mergenext[srcTape] == 0) + state->mergelast[srcTape] = 0; + tuplesort_heap_insert(state, newtup, srcTape, false); + /* put the now-unused memtuples entry on the freelist */ + newtup->tupindex = state->mergefreelist; + state->mergefreelist = tupIndex; + state->mergeavailslots[srcTape]++; + return true; + } + return false; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * If successful, put tuple in slot and return TRUE; else, clear the slot + * and return FALSE. + * + * Caller may optionally be passed back abbreviated value (on TRUE return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value in leading attribute will set abbreviated value to zeroed + * representation, which caller may rely on in abbreviated inequality check. + * + * The slot receives a tuple that's been copied into the caller's memory + * context, so that it will stay valid regardless of future manipulations of + * the tuplesort's state (up to and including deleting the tuplesort). + * This differs from similar routines for other types of tuplesorts. + */ +bool +tuplesort_gettupleslot(Tuplesortstate *state, bool forward, + TupleTableSlot *slot, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + bool should_free; + + if (!tuplesort_gettuple_common(state, forward, &stup, &should_free)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + if (stup.tuple) + { + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + /* + * Callers rely on tuple being in their own memory context, which is + * not guaranteed by tuplesort_gettuple_common(), even when should_free + * is set to TRUE. We must always copy here, since our interface does + * not allow callers to opt into arrangement where tuple memory can go + * away on the next call here, or after tuplesort_end() is called. + */ + ExecStoreMinimalTuple(heap_copy_minimal_tuple((MinimalTuple) stup.tuple), + slot, true); + + /* + * Free local copy if needed. It would be very invasive to get + * tuplesort_gettuple_common() to allocate tuple in caller's context + * for us, so we just do this instead. + */ + if (should_free) + pfree(stup.tuple); + + return true; + } + else + { + ExecClearTuple(slot); + return false; + } +} + +/* + * Fetch the next tuple in either forward or back direction. + * Returns NULL if no more tuples. If *should_free is set, the + * caller must pfree the returned tuple when done with it. + * If it is not set, caller should not use tuple following next + * call here. It's never okay to use it after tuplesort_end(). + */ +HeapTuple +tuplesort_getheaptuple(Tuplesortstate *state, bool forward, bool *should_free) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup, should_free)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return stup.tuple; +} + +/* + * Fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. If *should_free is set, the + * caller must pfree the returned tuple when done with it. + * If it is not set, caller should not use tuple following next + * call here. It's never okay to use it after tuplesort_end(). + */ +IndexTuple +tuplesort_getindextuple(Tuplesortstate *state, bool forward, + bool *should_free) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + + if (!tuplesort_gettuple_common(state, forward, &stup, should_free)) + stup.tuple = NULL; + + MemoryContextSwitchTo(oldcontext); + + return (IndexTuple) stup.tuple; +} + +/* + * Fetch the next Datum in either forward or back direction. + * Returns FALSE if no more datums. + * + * If the Datum is pass-by-ref type, the returned value is freshly palloc'd + * in caller's context, and is now owned by the caller (this differs from + * similar routines for other types of tuplesorts). + * + * Caller may optionally be passed back abbreviated value (on TRUE return + * value) when abbreviation was used, which can be used to cheaply avoid + * equality checks that might otherwise be required. Caller can safely make a + * determination of "non-equal tuple" based on simple binary inequality. A + * NULL value will have a zeroed abbreviated value representation, which caller + * may rely on in abbreviated inequality check. + */ +bool +tuplesort_getdatum(Tuplesortstate *state, bool forward, + Datum *val, bool *isNull, Datum *abbrev) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + bool should_free; + + if (!tuplesort_gettuple_common(state, forward, &stup, &should_free)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + + /* Ensure we copy into caller's memory context */ + MemoryContextSwitchTo(oldcontext); + + /* Record abbreviated key for caller */ + if (state->sortKeys->abbrev_converter && abbrev) + *abbrev = stup.datum1; + + if (stup.isnull1 || !state->tuples) + { + *val = stup.datum1; + *isNull = stup.isnull1; + } + else + { + /* + * Callers rely on datum being in their own memory context, which is + * not guaranteed by tuplesort_gettuple_common(), even when should_free + * is set to TRUE. We must always copy here, since our interface does + * not allow callers to opt into arrangement where tuple memory can go + * away on the next call here, or after tuplesort_end() is called. + * + * Use stup.tuple because stup.datum1 may be an abbreviation. + */ + *val = datumCopy(PointerGetDatum(stup.tuple), false, state->datumTypeLen); + *isNull = false; + + /* + * Free local copy if needed. It would be very invasive to get + * tuplesort_gettuple_common() to allocate tuple in caller's context + * for us, so we just do this instead. + */ + if (should_free) + pfree(stup.tuple); + } + + return true; +} + +/* + * Advance over N tuples in either forward or back direction, + * without returning any data. N==0 is a no-op. + * Returns TRUE if successful, FALSE if ran out of tuples. + */ +bool +tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples, bool forward) +{ + MemoryContext oldcontext; + + /* + * We don't actually support backwards skip yet, because no callers need + * it. The API is designed to allow for that later, though. + */ + Assert(forward); + Assert(ntuples >= 0); + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->memtupcount - state->current >= ntuples) + { + state->current += ntuples; + return true; + } + state->current = state->memtupcount; + state->eof_reached = true; + + /* + * Complain if caller tries to retrieve more tuples than + * originally asked for in a bounded sort. This is because + * returning EOF here might be the wrong thing. + */ + if (state->bounded && state->current >= state->bound) + elog(ERROR, "retrieved too many tuples in a bounded sort"); + + return false; + + case TSS_SORTEDONTAPE: + case TSS_FINALMERGE: + + /* + * We could probably optimize these cases better, but for now it's + * not worth the trouble. + */ + oldcontext = MemoryContextSwitchTo(state->sortcontext); + while (ntuples-- > 0) + { + SortTuple stup; + bool should_free; + + if (!tuplesort_gettuple_common(state, forward, + &stup, &should_free)) + { + MemoryContextSwitchTo(oldcontext); + return false; + } + if (should_free && stup.tuple) + pfree(stup.tuple); + CHECK_FOR_INTERRUPTS(); + } + MemoryContextSwitchTo(oldcontext); + return true; + + default: + elog(ERROR, "invalid tuplesort state"); + return false; /* keep compiler quiet */ + } +} + +/* + * tuplesort_merge_order - report merge order we'll use for given memory + * (note: "merge order" just means the number of input tapes in the merge). + * + * This is exported for use by the planner. allowedMem is in bytes. + */ +int +tuplesort_merge_order(int64 allowedMem) +{ + int mOrder; + + /* + * We need one tape for each merge input, plus another one for the output, + * and each of these tapes needs buffer space. In addition we want + * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't + * count). + * + * Note: you might be thinking we need to account for the memtuples[] + * array in this calculation, but we effectively treat that as part of the + * MERGE_BUFFER_SIZE workspace. + */ + mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / + (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); + + /* Even in minimum memory, use at least a MINORDER merge */ + mOrder = Max(mOrder, MINORDER); + + return mOrder; +} + +/* + * useselection - determine algorithm to use to sort first run. + * + * It can sometimes be useful to use the replacement selection algorithm if it + * results in one large run, and there is little available workMem. See + * remarks on RUN_SECOND optimization within dumptuples(). + */ +static bool +useselection(Tuplesortstate *state) +{ + /* + * memtupsize might be noticeably higher than memtupcount here in atypical + * cases. It seems slightly preferable to not allow recent outliers to + * impact this determination. Note that caller's trace_sort output + * reports memtupcount instead. + */ + if (state->memtupsize <= replacement_sort_tuples) + return true; + + return false; +} + +/* + * inittapes - initialize for tape sorting. + * + * This is called only if we have found we don't have room to sort in memory. + */ +static void +inittapes(Tuplesortstate *state) +{ + int maxTapes, + j; + int64 tapeSpace; + + /* Compute number of tapes to use: merge order plus 1 */ + maxTapes = tuplesort_merge_order(state->allowedMem) + 1; + + /* + * We must have at least 2*maxTapes slots in the memtuples[] array, else + * we'd not have room for merge heap plus preread. It seems unlikely that + * this case would ever occur, but be safe. + */ + maxTapes = Min(maxTapes, state->memtupsize / 2); + + state->maxTapes = maxTapes; + state->tapeRange = maxTapes - 1; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "switching to external sort with %d tapes: %s", + maxTapes, pg_rusage_show(&state->ru_start)); +#endif + + /* + * Decrease availMem to reflect the space needed for tape buffers; but + * don't decrease it to the point that we have no room for tuples. (That + * case is only likely to occur if sorting pass-by-value Datums; in all + * other scenarios the memtuples[] array is unlikely to occupy more than + * half of allowedMem. In the pass-by-value case it's not important to + * account for tuple space, so we don't care if LACKMEM becomes + * inaccurate.) + */ + tapeSpace = (int64) maxTapes *TAPE_BUFFER_OVERHEAD; + + if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) + USEMEM(state, tapeSpace); + + /* + * Make sure that the temp file(s) underlying the tape set are created in + * suitable temp tablespaces. + */ + PrepareTempTablespaces(); + + /* + * Create the tape set and allocate the per-tape data arrays. + */ + state->tapeset = LogicalTapeSetCreate(maxTapes); + + state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); + state->mergenext = (int *) palloc0(maxTapes * sizeof(int)); + state->mergelast = (int *) palloc0(maxTapes * sizeof(int)); + state->mergeavailslots = (int *) palloc0(maxTapes * sizeof(int)); + state->mergeavailmem = (int64 *) palloc0(maxTapes * sizeof(int64)); + state->mergetuples = (char **) palloc0(maxTapes * sizeof(char *)); + state->mergecurrent = (char **) palloc0(maxTapes * sizeof(char *)); + state->mergetail = (char **) palloc0(maxTapes * sizeof(char *)); + state->mergeoverflow = (char **) palloc0(maxTapes * sizeof(char *)); + state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); + state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); + + /* + * Give replacement selection a try based on user setting. There will be + * a switch to a simple hybrid sort-merge strategy after the first run + * (iff we could not output one long run). + */ + state->replaceActive = useselection(state); + + if (state->replaceActive) + { + /* + * Convert the unsorted contents of memtuples[] into a heap. Each + * tuple is marked as belonging to run number zero. + * + * NOTE: we pass false for checkIndex since there's no point in + * comparing indexes in this step, even though we do intend the + * indexes to be part of the sort key... + */ + int ntuples = state->memtupcount; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "replacement selection will sort %d first run tuples", + state->memtupcount); +#endif + state->memtupcount = 0; /* make the heap empty */ + + for (j = 0; j < ntuples; j++) + { + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[j]; + + tuplesort_heap_insert(state, &stup, 0, false); + } + Assert(state->memtupcount == ntuples); + } + + state->currentRun = RUN_FIRST; + + /* + * Initialize variables of Algorithm D (step D1). + */ + for (j = 0; j < maxTapes; j++) + { + state->tp_fib[j] = 1; + state->tp_runs[j] = 0; + state->tp_dummy[j] = 1; + state->tp_tapenum[j] = j; + } + state->tp_fib[state->tapeRange] = 0; + state->tp_dummy[state->tapeRange] = 0; + + state->Level = 1; + state->destTape = 0; + + state->status = TSS_BUILDRUNS; +} + +/* + * selectnewtape -- select new tape for new initial run. + * + * This is called after finishing a run when we know another run + * must be started. This implements steps D3, D4 of Algorithm D. + */ +static void +selectnewtape(Tuplesortstate *state) +{ + int j; + int a; + + /* Step D3: advance j (destTape) */ + if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) + { + state->destTape++; + return; + } + if (state->tp_dummy[state->destTape] != 0) + { + state->destTape = 0; + return; + } + + /* Step D4: increase level */ + state->Level++; + a = state->tp_fib[0]; + for (j = 0; j < state->tapeRange; j++) + { + state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; + state->tp_fib[j] = a + state->tp_fib[j + 1]; + } + state->destTape = 0; +} + +/* + * mergeruns -- merge all the completed initial runs. + * + * This implements steps D5, D6 of Algorithm D. All input data has + * already been written to initial runs on tape (see dumptuples). + */ +static void +mergeruns(Tuplesortstate *state) +{ + int tapenum, + svTape, + svRuns, + svDummy; + + Assert(state->status == TSS_BUILDRUNS); + Assert(state->memtupcount == 0); + + if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL) + { + /* + * If there are multiple runs to be merged, when we go to read back + * tuples from disk, abbreviated keys will not have been stored, and + * we don't care to regenerate them. Disable abbreviation from this + * point on. + */ + state->sortKeys->abbrev_converter = NULL; + state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator; + + /* Not strictly necessary, but be tidy */ + state->sortKeys->abbrev_abort = NULL; + state->sortKeys->abbrev_full_comparator = NULL; + } + + /* + * If we produced only one initial run (quite likely if the total data + * volume is between 1X and 2X workMem when replacement selection is used, + * but something we particular count on when input is presorted), we can + * just use that tape as the finished output, rather than doing a useless + * merge. (This obvious optimization is not in Knuth's algorithm.) + */ + if (state->currentRun == RUN_SECOND) + { + state->result_tape = state->tp_tapenum[state->destTape]; + /* must freeze and rewind the finished output tape */ + LogicalTapeFreeze(state->tapeset, state->result_tape); + state->status = TSS_SORTEDONTAPE; + return; + } + + /* End of step D2: rewind all output tapes to prepare for merging */ + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + LogicalTapeRewind(state->tapeset, tapenum, false); + + for (;;) + { + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess) + { + bool allOneRun = true; + + Assert(state->tp_runs[state->tapeRange] == 0); + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { + allOneRun = false; + break; + } + } + if (allOneRun) + { + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); + /* Initialize for the final merge pass */ + beginmerge(state, state->tuples); + state->status = TSS_FINALMERGE; + return; + } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + + if (allDummy) + { + state->tp_dummy[state->tapeRange]++; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + state->tp_dummy[tapenum]--; + } + else + mergeonerun(state); + } + + /* Step D6: decrease level */ + if (--state->Level == 0) + break; + /* rewind output tape T to use as new input */ + LogicalTapeRewind(state->tapeset, state->tp_tapenum[state->tapeRange], + false); + /* rewind used-up input tape P, and prepare it for write pass */ + LogicalTapeRewind(state->tapeset, state->tp_tapenum[state->tapeRange - 1], + true); + state->tp_runs[state->tapeRange - 1] = 0; + + /* + * reassign tape units per step D6; note we no longer care about A[] + */ + svTape = state->tp_tapenum[state->tapeRange]; + svDummy = state->tp_dummy[state->tapeRange]; + svRuns = state->tp_runs[state->tapeRange]; + for (tapenum = state->tapeRange; tapenum > 0; tapenum--) + { + state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; + state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; + state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; + } + state->tp_tapenum[0] = svTape; + state->tp_dummy[0] = svDummy; + state->tp_runs[0] = svRuns; + } + + /* + * Done. Knuth says that the result is on TAPE[1], but since we exited + * the loop without performing the last iteration of step D6, we have not + * rearranged the tape unit assignment, and therefore the result is on + * TAPE[T]. We need to do it this way so that we can freeze the final + * output tape while rewinding it. The last iteration of step D6 would be + * a waste of cycles anyway... + */ + state->result_tape = state->tp_tapenum[state->tapeRange]; + LogicalTapeFreeze(state->tapeset, state->result_tape); + state->status = TSS_SORTEDONTAPE; +} + +/* + * Merge one run from each input tape, except ones with dummy runs. + * + * This is the inner loop of Algorithm D step D5. We know that the + * output tape is TAPE[T]. + */ +static void +mergeonerun(Tuplesortstate *state) +{ + int destTape = state->tp_tapenum[state->tapeRange]; + int srcTape; + int tupIndex; + SortTuple *tup; + int64 priorAvail, + spaceFreed; + + /* + * Start the merge by loading one tuple from each active source tape into + * the heap. We can also decrease the input run/dummy run counts. + */ + beginmerge(state, false); + + /* + * Execute merge by repeatedly extracting lowest tuple in heap, writing it + * out, and replacing it with next tuple from same tape (if there is + * another one). + */ + while (state->memtupcount > 0) + { + /* write the tuple to destTape */ + priorAvail = state->availMem; + srcTape = state->memtuples[0].tupindex; + WRITETUP(state, destTape, &state->memtuples[0]); + /* writetup adjusted total free space, now fix per-tape space */ + spaceFreed = state->availMem - priorAvail; + state->mergeavailmem[srcTape] += spaceFreed; + /* compact the heap */ + tuplesort_heap_siftup(state, false); + if ((tupIndex = state->mergenext[srcTape]) == 0) + { + /* out of preloaded data on this tape, try to read more */ + mergepreread(state); + /* if still no data, we've reached end of run on this tape */ + if ((tupIndex = state->mergenext[srcTape]) == 0) + continue; + } + /* pull next preread tuple from list, insert in heap */ + tup = &state->memtuples[tupIndex]; + state->mergenext[srcTape] = tup->tupindex; + if (state->mergenext[srcTape] == 0) + state->mergelast[srcTape] = 0; + tuplesort_heap_insert(state, tup, srcTape, false); + /* put the now-unused memtuples entry on the freelist */ + tup->tupindex = state->mergefreelist; + state->mergefreelist = tupIndex; + state->mergeavailslots[srcTape]++; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated, but AllocSetFree will have put those chunks of memory on + * particular free lists, bucketed by size class. Thus, although all of + * that memory is free, it is effectively fragmented. Resetting the + * context gets us out from under that problem. + */ + MemoryContextReset(state->tuplecontext); + + /* + * When the heap empties, we're done. Write an end-of-run marker on the + * output tape, and increment its count of real runs. + */ + markrunend(state, destTape); + state->tp_runs[state->tapeRange]++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished %d-way merge step: %s", state->activeTapes, + pg_rusage_show(&state->ru_start)); +#endif +} + +/* + * beginmerge - initialize for a merge pass + * + * We decrease the counts of real and dummy runs for each tape, and mark + * which tapes contain active input runs in mergeactive[]. Then, load + * as many tuples as we can from each active input tape, and finally + * fill the merge heap with the first tuple from each active tape. + * + * finalMergeBatch indicates if this is the beginning of a final on-the-fly + * merge where a batched allocation of tuple memory is required. + */ +static void +beginmerge(Tuplesortstate *state, bool finalMergeBatch) +{ + int activeTapes; + int tapenum; + int srcTape; + int slotsPerTape; + int64 spacePerTape; + + /* Heap should be empty here */ + Assert(state->memtupcount == 0); + + /* Adjust run counts and mark the active tapes */ + memset(state->mergeactive, 0, + state->maxTapes * sizeof(*state->mergeactive)); + activeTapes = 0; + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] > 0) + state->tp_dummy[tapenum]--; + else + { + Assert(state->tp_runs[tapenum] > 0); + state->tp_runs[tapenum]--; + srcTape = state->tp_tapenum[tapenum]; + state->mergeactive[srcTape] = true; + activeTapes++; + } + } + state->activeTapes = activeTapes; + + /* Clear merge-pass state variables */ + memset(state->mergenext, 0, + state->maxTapes * sizeof(*state->mergenext)); + memset(state->mergelast, 0, + state->maxTapes * sizeof(*state->mergelast)); + state->mergefreelist = 0; /* nothing in the freelist */ + state->mergefirstfree = activeTapes; /* 1st slot avail for preread */ + + if (finalMergeBatch) + { + /* Free outright buffers for tape never actually allocated */ + FREEMEM(state, (state->maxTapes - activeTapes) * TAPE_BUFFER_OVERHEAD); + + /* + * Grow memtuples one last time, since the palloc() overhead no longer + * incurred can make a big difference + */ + batchmemtuples(state); + } + + /* + * Initialize space allocation to let each active input tape have an equal + * share of preread space. + */ + Assert(activeTapes > 0); + slotsPerTape = (state->memtupsize - state->mergefirstfree) / activeTapes; + Assert(slotsPerTape > 0); + spacePerTape = MAXALIGN_DOWN(state->availMem / activeTapes); + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + if (state->mergeactive[srcTape]) + { + state->mergeavailslots[srcTape] = slotsPerTape; + state->mergeavailmem[srcTape] = spacePerTape; + } + } + + /* + * Preallocate tuple batch memory for each tape. This is the memory used + * for tuples themselves (not SortTuples), so it's never used by + * pass-by-value datum sorts. Memory allocation is performed here at most + * once per sort, just in advance of the final on-the-fly merge step. + */ + if (finalMergeBatch) + mergebatch(state, spacePerTape); + + /* + * Preread as many tuples as possible (and at least one) from each active + * tape + */ + mergepreread(state); + + /* Load the merge heap with the first tuple from each input tape */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + int tupIndex = state->mergenext[srcTape]; + SortTuple *tup; + + if (tupIndex) + { + tup = &state->memtuples[tupIndex]; + state->mergenext[srcTape] = tup->tupindex; + if (state->mergenext[srcTape] == 0) + state->mergelast[srcTape] = 0; + tuplesort_heap_insert(state, tup, srcTape, false); + /* put the now-unused memtuples entry on the freelist */ + tup->tupindex = state->mergefreelist; + state->mergefreelist = tupIndex; + state->mergeavailslots[srcTape]++; + +#ifdef TRACE_SORT + if (trace_sort && finalMergeBatch) + { + int64 perTapeKB = (spacePerTape + 1023) / 1024; + int64 usedSpaceKB; + int usedSlots; + + /* + * Report how effective batchmemtuples() was in balancing the + * number of slots against the need for memory for the + * underlying tuples (e.g. IndexTuples). The big preread of + * all tapes when switching to FINALMERGE state should be + * fairly representative of memory utilization during the + * final merge step, and in any case is the only point at + * which all tapes are guaranteed to have depleted either + * their batch memory allowance or slot allowance. Ideally, + * both will be completely depleted for every tape by now. + */ + usedSpaceKB = (state->mergecurrent[srcTape] - + state->mergetuples[srcTape] + 1023) / 1024; + usedSlots = slotsPerTape - state->mergeavailslots[srcTape]; + + elog(LOG, "tape %d initially used " INT64_FORMAT " KB of " + INT64_FORMAT " KB batch (%2.3f) and %d out of %d slots " + "(%2.3f)", srcTape, + usedSpaceKB, perTapeKB, + (double) usedSpaceKB / (double) perTapeKB, + usedSlots, slotsPerTape, + (double) usedSlots / (double) slotsPerTape); + } +#endif + } + } +} + +/* + * batchmemtuples - grow memtuples without palloc overhead + * + * When called, availMem should be approximately the amount of memory we'd + * require to allocate memtupsize - memtupcount tuples (not SortTuples/slots) + * that were allocated with palloc() overhead, and in doing so use up all + * allocated slots. However, though slots and tuple memory is in balance + * following the last grow_memtuples() call, that's predicated on the observed + * average tuple size for the "final" grow_memtuples() call, which includes + * palloc overhead. During the final merge pass, where we will arrange to + * squeeze out the palloc overhead, we might need more slots in the memtuples + * array. + * + * To make that happen, arrange for the amount of remaining memory to be + * exactly equal to the palloc overhead multiplied by the current size of + * the memtuples array, force the grow_memtuples flag back to true (it's + * probably but not necessarily false on entry to this routine), and then + * call grow_memtuples. This simulates loading enough tuples to fill the + * whole memtuples array and then having some space left over because of the + * elided palloc overhead. We expect that grow_memtuples() will conclude that + * it can't double the size of the memtuples array but that it can increase + * it by some percentage; but if it does decide to double it, that just means + * that we've never managed to use many slots in the memtuples array, in which + * case doubling it shouldn't hurt anything anyway. + */ +static void +batchmemtuples(Tuplesortstate *state) +{ + int64 refund; + int64 availMemLessRefund; + int memtupsize = state->memtupsize; + + /* Caller error if we have no tapes */ + Assert(state->activeTapes > 0); + + /* For simplicity, assume no memtuples are actually currently counted */ + Assert(state->memtupcount == 0); + + /* + * Refund STANDARDCHUNKHEADERSIZE per tuple. + * + * This sometimes fails to make memory use perfectly balanced, but it + * should never make the situation worse. Note that Assert-enabled builds + * get a larger refund, due to a varying STANDARDCHUNKHEADERSIZE. + */ + refund = memtupsize * STANDARDCHUNKHEADERSIZE; + availMemLessRefund = state->availMem - refund; + + /* + * We need to be sure that we do not cause LACKMEM to become true, else + * the batch allocation size could be calculated as negative, causing + * havoc. Hence, if availMemLessRefund is negative at this point, we must + * do nothing. Moreover, if it's positive but rather small, there's + * little point in proceeding because we could only increase memtuples by + * a small amount, not worth the cost of the repalloc's. We somewhat + * arbitrarily set the threshold at ALLOCSET_DEFAULT_INITSIZE per tape. + * (Note that this does not represent any assumption about tuple sizes.) + */ + if (availMemLessRefund <= + (int64) state->activeTapes * ALLOCSET_DEFAULT_INITSIZE) + return; + + /* + * To establish balanced memory use after refunding palloc overhead, + * temporarily have our accounting indicate that we've allocated all + * memory we're allowed to less that refund, and call grow_memtuples() to + * have it increase the number of slots. + */ + state->growmemtuples = true; + USEMEM(state, availMemLessRefund); + (void) grow_memtuples(state); + state->growmemtuples = false; + /* availMem must stay accurate for spacePerTape calculation */ + FREEMEM(state, availMemLessRefund); + if (LACKMEM(state)) + elog(ERROR, "unexpected out-of-memory situation in tuplesort"); + +#ifdef TRACE_SORT + if (trace_sort) + { + Size OldKb = (memtupsize * sizeof(SortTuple) + 1023) / 1024; + Size NewKb = (state->memtupsize * sizeof(SortTuple) + 1023) / 1024; + + elog(LOG, "grew memtuples %1.2fx from %d (%zu KB) to %d (%zu KB) for final merge", + (double) NewKb / (double) OldKb, + memtupsize, OldKb, + state->memtupsize, NewKb); + } +#endif +} + +/* + * mergebatch - initialize tuple memory in batch + * + * This allows sequential access to sorted tuples buffered in memory from + * tapes/runs on disk during a final on-the-fly merge step. Note that the + * memory is not used for SortTuples, but for the underlying tuples (e.g. + * MinimalTuples). + * + * Note that when batch memory is used, there is a simple division of space + * into large buffers (one per active tape). The conventional incremental + * memory accounting (calling USEMEM() and FREEMEM()) is abandoned. Instead, + * when each tape's memory budget is exceeded, a retail palloc() "overflow" is + * performed, which is then immediately detected in a way that is analogous to + * LACKMEM(). This keeps each tape's use of memory fair, which is always a + * goal. + */ +static void +mergebatch(Tuplesortstate *state, int64 spacePerTape) +{ + int srcTape; + + Assert(state->activeTapes > 0); + Assert(state->tuples); + + /* + * For the purposes of tuplesort's memory accounting, the batch allocation + * is special, and regular memory accounting through USEMEM() calls is + * abandoned (see mergeprereadone()). + */ + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + { + char *mergetuples; + + if (!state->mergeactive[srcTape]) + continue; + + /* Allocate buffer for each active tape */ + mergetuples = MemoryContextAllocHuge(state->tuplecontext, + spacePerTape); + + /* Initialize state for tape */ + state->mergetuples[srcTape] = mergetuples; + state->mergecurrent[srcTape] = mergetuples; + state->mergetail[srcTape] = mergetuples; + state->mergeoverflow[srcTape] = NULL; + } + + state->batchUsed = true; + state->spacePerTape = spacePerTape; +} + +/* + * mergebatchone - prepare batch memory for one merge input tape + * + * This is called following the exhaustion of preread tuples for one input + * tape. All that actually occurs is that the state for the source tape is + * reset to indicate that all memory may be reused. + * + * This routine must deal with fixing up the tuple that is about to be returned + * to the client, due to "overflow" allocations. + */ +static void +mergebatchone(Tuplesortstate *state, int srcTape, SortTuple *rtup, + bool *should_free) +{ + Assert(state->batchUsed); + + /* + * Tuple about to be returned to caller ("stup") is final preread tuple + * from tape, just removed from the top of the heap. Special steps around + * memory management must be performed for that tuple, to make sure it + * isn't overwritten early. + */ + if (!state->mergeoverflow[srcTape]) + { + Size tupLen; + + /* + * Mark tuple buffer range for reuse, but be careful to move final, + * tail tuple to start of space for next run so that it's available to + * caller when stup is returned, and remains available at least until + * the next tuple is requested. + */ + tupLen = state->mergecurrent[srcTape] - state->mergetail[srcTape]; + state->mergecurrent[srcTape] = state->mergetuples[srcTape]; + MOVETUP(state->mergecurrent[srcTape], state->mergetail[srcTape], + tupLen); + + /* Make SortTuple at top of the merge heap point to new tuple */ + rtup->tuple = (void *) state->mergecurrent[srcTape]; + + state->mergetail[srcTape] = state->mergecurrent[srcTape]; + state->mergecurrent[srcTape] += tupLen; + } + else + { + /* + * Handle an "overflow" retail palloc. + * + * This is needed when we run out of tuple memory for the tape. + */ + state->mergecurrent[srcTape] = state->mergetuples[srcTape]; + state->mergetail[srcTape] = state->mergetuples[srcTape]; + + if (rtup->tuple) + { + Assert(rtup->tuple == (void *) state->mergeoverflow[srcTape]); + /* Caller should free palloc'd tuple */ + *should_free = true; + } + state->mergeoverflow[srcTape] = NULL; + } +} + +/* + * mergebatchfreetape - handle final clean-up for batch memory once tape is + * about to become exhausted + * + * All tuples are returned from tape, but a single final tuple, *rtup, is to be + * passed back to caller. Free tape's batch allocation buffer while ensuring + * that the final tuple is managed appropriately. + */ +static void +mergebatchfreetape(Tuplesortstate *state, int srcTape, SortTuple *rtup, + bool *should_free) +{ + Assert(state->batchUsed); + Assert(state->status == TSS_FINALMERGE); + + /* + * Tuple may or may not already be an overflow allocation from + * mergebatchone() + */ + if (!*should_free && rtup->tuple) + { + /* + * Final tuple still in tape's batch allocation. + * + * Return palloc()'d copy to caller, and have it freed in a similar + * manner to overflow allocation. Otherwise, we'd free batch memory + * and pass back a pointer to garbage. Note that we deliberately + * allocate this in the parent tuplesort context, to be on the safe + * side. + */ + Size tuplen; + void *oldTuple = rtup->tuple; + + tuplen = state->mergecurrent[srcTape] - state->mergetail[srcTape]; + rtup->tuple = MemoryContextAlloc(state->sortcontext, tuplen); + MOVETUP(rtup->tuple, oldTuple, tuplen); + *should_free = true; + } + + /* Free spacePerTape-sized buffer */ + pfree(state->mergetuples[srcTape]); +} + +/* + * mergebatchalloc - allocate memory for one tuple using a batch memory + * "logical allocation". + * + * This is used for the final on-the-fly merge phase only. READTUP() routines + * receive memory from here in place of palloc() and USEMEM() calls. + * + * Tuple tapenum is passed, ensuring each tape's tuples are stored in sorted, + * contiguous order (while allowing safe reuse of memory made available to + * each tape). This maximizes locality of access as tuples are returned by + * final merge. + * + * Caller must not subsequently attempt to free memory returned here. In + * general, only mergebatch* functions know about how memory returned from + * here should be freed, and this function's caller must ensure that batch + * memory management code will definitely have the opportunity to do the right + * thing during the final on-the-fly merge. + */ +static void * +mergebatchalloc(Tuplesortstate *state, int tapenum, Size tuplen) +{ + Size reserve_tuplen = MAXALIGN(tuplen); + char *ret; + + /* Should overflow at most once before mergebatchone() call: */ + Assert(state->mergeoverflow[tapenum] == NULL); + Assert(state->batchUsed); + + /* It should be possible to use precisely spacePerTape memory at once */ + if (state->mergecurrent[tapenum] + reserve_tuplen <= + state->mergetuples[tapenum] + state->spacePerTape) + { + /* + * Usual case -- caller is returned pointer into its tape's buffer, + * and an offset from that point is recorded as where tape has + * consumed up to for current round of preloading. + */ + ret = state->mergetail[tapenum] = state->mergecurrent[tapenum]; + state->mergecurrent[tapenum] += reserve_tuplen; + } + else + { + /* + * Allocate memory, and record as tape's overflow allocation. This + * will be detected quickly, in a similar fashion to a LACKMEM() + * condition, and should not happen again before a new round of + * preloading for caller's tape. Note that we deliberately allocate + * this in the parent tuplesort context, to be on the safe side. + * + * Sometimes, this does not happen because merging runs out of slots + * before running out of memory. + */ + ret = state->mergeoverflow[tapenum] = + MemoryContextAlloc(state->sortcontext, tuplen); + } + + return ret; +} + +/* + * mergepreread - load tuples from merge input tapes + * + * This routine exists to improve sequentiality of reads during a merge pass, + * as explained in the header comments of this file. Load tuples from each + * active source tape until the tape's run is exhausted or it has used up + * its fair share of available memory. In any case, we guarantee that there + * is at least one preread tuple available from each unexhausted input tape. + * + * We invoke this routine at the start of a merge pass for initial load, + * and then whenever any tape's preread data runs out. Note that we load + * as much data as possible from all tapes, not just the one that ran out. + * This is because logtape.c works best with a usage pattern that alternates + * between reading a lot of data and writing a lot of data, so whenever we + * are forced to read, we should fill working memory completely. + * + * In FINALMERGE state, we *don't* use this routine, but instead just preread + * from the single tape that ran dry. There's no read/write alternation in + * that state and so no point in scanning through all the tapes to fix one. + * (Moreover, there may be quite a lot of inactive tapes in that state, since + * we might have had many fewer runs than tapes. In a regular tape-to-tape + * merge we can expect most of the tapes to be active. Plus, only + * FINALMERGE state has to consider memory management for a batch + * allocation.) + */ +static void +mergepreread(Tuplesortstate *state) +{ + int srcTape; + + for (srcTape = 0; srcTape < state->maxTapes; srcTape++) + mergeprereadone(state, srcTape); +} + +/* + * mergeprereadone - load tuples from one merge input tape + * + * Read tuples from the specified tape until it has used up its free memory + * or array slots; but ensure that we have at least one tuple, if any are + * to be had. + */ +static void +mergeprereadone(Tuplesortstate *state, int srcTape) +{ + unsigned int tuplen; + SortTuple stup; + int tupIndex; + int64 priorAvail, + spaceUsed; + + if (!state->mergeactive[srcTape]) + return; /* tape's run is already exhausted */ + + /* + * Manage per-tape availMem. Only actually matters when batch memory not + * in use. + */ + priorAvail = state->availMem; + state->availMem = state->mergeavailmem[srcTape]; + + /* + * When batch memory is used if final on-the-fly merge, only mergeoverflow + * test is relevant; otherwise, only LACKMEM() test is relevant. + */ + while ((state->mergeavailslots[srcTape] > 0 && + state->mergeoverflow[srcTape] == NULL && !LACKMEM(state)) || + state->mergenext[srcTape] == 0) + { + /* read next tuple, if any */ + if ((tuplen = getlen(state, srcTape, true)) == 0) + { + state->mergeactive[srcTape] = false; + break; + } + READTUP(state, &stup, srcTape, tuplen); + /* find a free slot in memtuples[] for it */ + tupIndex = state->mergefreelist; + if (tupIndex) + state->mergefreelist = state->memtuples[tupIndex].tupindex; + else + { + tupIndex = state->mergefirstfree++; + Assert(tupIndex < state->memtupsize); + } + state->mergeavailslots[srcTape]--; + /* store tuple, append to list for its tape */ + stup.tupindex = 0; + state->memtuples[tupIndex] = stup; + if (state->mergelast[srcTape]) + state->memtuples[state->mergelast[srcTape]].tupindex = tupIndex; + else + state->mergenext[srcTape] = tupIndex; + state->mergelast[srcTape] = tupIndex; + } + /* update per-tape and global availmem counts */ + spaceUsed = state->mergeavailmem[srcTape] - state->availMem; + state->mergeavailmem[srcTape] = state->availMem; + state->availMem = priorAvail - spaceUsed; +} + +/* + * dumptuples - remove tuples from memtuples and write to tape + * + * This is used during initial-run building, but not during merging. + * + * When alltuples = false and replacement selection is still active, dump + * only enough tuples to get under the availMem limit (and leave at least + * one tuple in memtuples, since puttuple will then assume it is a heap that + * has a tuple to compare to). We always insist there be at least one free + * slot in the memtuples[] array. + * + * When alltuples = true, dump everything currently in memory. (This + * case is only used at end of input data, although in practice only the + * first run could fail to dump all tuples when we LACKMEM(), and only + * when replacement selection is active.) + * + * If, when replacement selection is active, we see that the tuple run + * number at the top of the heap has changed, start a new run. This must be + * the first run, because replacement selection is always abandoned for all + * further runs. + */ +static void +dumptuples(Tuplesortstate *state, bool alltuples) +{ + while (alltuples || + (LACKMEM(state) && state->memtupcount > 1) || + state->memtupcount >= state->memtupsize) + { + if (state->replaceActive) + { + /* + * Still holding out for a case favorable to replacement + * selection. Still incrementally spilling using heap. + * + * Dump the heap's frontmost entry, and sift up to remove it from + * the heap. + */ + Assert(state->memtupcount > 0); + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[0]); + tuplesort_heap_siftup(state, true); + } + else + { + /* + * Once committed to quicksorting runs, never incrementally spill + */ + dumpbatch(state, alltuples); + break; + } + + /* + * If top run number has changed, we've finished the current run (this + * can only be the first run), and will no longer spill incrementally. + */ + if (state->memtupcount == 0 || + state->memtuples[0].tupindex == HEAP_RUN_NEXT) + { + markrunend(state, state->tp_tapenum[state->destTape]); + Assert(state->currentRun == RUN_FIRST); + state->currentRun++; + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished incrementally writing %s run %d to tape %d: %s", + (state->memtupcount == 0) ? "only" : "first", + state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + /* + * Done if heap is empty, which is possible when there is only one + * long run. + */ + Assert(state->currentRun == RUN_SECOND); + if (state->memtupcount == 0) + { + /* + * Replacement selection best case; no final merge required, + * because there was only one initial run (second run has no + * tuples). See RUN_SECOND case in mergeruns(). + */ + break; + } + + /* + * Abandon replacement selection for second run (as well as any + * subsequent runs). + */ + state->replaceActive = false; + + /* + * First tuple of next run should not be heapified, and so will + * bear placeholder run number. In practice this must actually be + * the second run, which just became the currentRun, so we're + * clear to quicksort and dump the tuples in batch next time + * memtuples becomes full. + */ + Assert(state->memtuples[0].tupindex == HEAP_RUN_NEXT); + selectnewtape(state); + } + } +} + +/* + * dumpbatch - sort and dump all memtuples, forming one run on tape + * + * Second or subsequent runs are never heapified by this module (although + * heapification still respects run number differences between the first and + * second runs), and a heap (replacement selection priority queue) is often + * avoided in the first place. + */ +static void +dumpbatch(Tuplesortstate *state, bool alltuples) +{ + int memtupwrite; + int i; + + /* + * Final call might require no sorting, in rare cases where we just so + * happen to have previously LACKMEM()'d at the point where exactly all + * remaining tuples are loaded into memory, just before input was + * exhausted. + * + * In general, short final runs are quite possible. Rather than allowing + * a special case where there was a superfluous selectnewtape() call (i.e. + * a call with no subsequent run actually written to destTape), we prefer + * to write out a 0 tuple run. + * + * mergepreread()/mergeprereadone() are prepared for 0 tuple runs, and + * will reliably mark the tape inactive for the merge when called from + * beginmerge(). This case is therefore similar to the case where + * mergeonerun() finds a dummy run for the tape, and so doesn't need to + * merge a run from the tape (or conceptually "merges" the dummy run, if + * you prefer). According to Knuth, Algorithm D "isn't strictly optimal" + * in its method of distribution and dummy run assignment; this edge case + * seems very unlikely to make that appreciably worse. + */ + Assert(state->status == TSS_BUILDRUNS); + + /* + * It seems unlikely that this limit will ever be exceeded, but take no + * chances + */ + if (state->currentRun == INT_MAX) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("cannot have more than %d runs for an external sort", + INT_MAX))); + + state->currentRun++; + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "starting quicksort of run %d: %s", + state->currentRun, pg_rusage_show(&state->ru_start)); +#endif + + /* + * Sort all tuples accumulated within the allowed amount of memory for + * this run using quicksort + */ + tuplesort_sort_memtuples(state); + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished quicksort of run %d: %s", + state->currentRun, pg_rusage_show(&state->ru_start)); +#endif + + memtupwrite = state->memtupcount; + for (i = 0; i < memtupwrite; i++) + { + WRITETUP(state, state->tp_tapenum[state->destTape], + &state->memtuples[i]); + state->memtupcount--; + } + + /* + * Reset tuple memory. We've freed all of the tuples that we previously + * allocated. It's important to avoid fragmentation when there is a stark + * change in allocation patterns due to the use of batch memory. + * Fragmentation due to AllocSetFree's bucketing by size class might be + * particularly bad if this step wasn't taken. + */ + MemoryContextReset(state->tuplecontext); + + markrunend(state, state->tp_tapenum[state->destTape]); + state->tp_runs[state->destTape]++; + state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ + +#ifdef TRACE_SORT + if (trace_sort) + elog(LOG, "finished writing run %d to tape %d: %s", + state->currentRun, state->destTape, + pg_rusage_show(&state->ru_start)); +#endif + + if (!alltuples) + selectnewtape(state); +} + +/* + * tuplesort_rescan - rewind and replay the scan + */ +void +tuplesort_rescan(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = 0; + state->eof_reached = false; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + case TSS_SORTEDONTAPE: + LogicalTapeRewind(state->tapeset, + state->result_tape, + false); + state->eof_reached = false; + state->markpos_block = 0L; + state->markpos_offset = 0; + state->markpos_eof = false; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_markpos - saves current position in the merged sort file + */ +void +tuplesort_markpos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->markpos_offset = state->current; + state->markpos_eof = state->eof_reached; + break; + case TSS_SORTEDONTAPE: + LogicalTapeTell(state->tapeset, + state->result_tape, + &state->markpos_block, + &state->markpos_offset); + state->markpos_eof = state->eof_reached; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_restorepos - restores current position in merged sort file to + * last saved position + */ +void +tuplesort_restorepos(Tuplesortstate *state) +{ + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + + Assert(state->randomAccess); + + switch (state->status) + { + case TSS_SORTEDINMEM: + state->current = state->markpos_offset; + state->eof_reached = state->markpos_eof; + break; + case TSS_SORTEDONTAPE: + if (!LogicalTapeSeek(state->tapeset, + state->result_tape, + state->markpos_block, + state->markpos_offset)) + elog(ERROR, "tuplesort_restorepos failed"); + state->eof_reached = state->markpos_eof; + break; + default: + elog(ERROR, "invalid tuplesort state"); + break; + } + + MemoryContextSwitchTo(oldcontext); +} + +/* + * tuplesort_get_stats - extract summary statistics + * + * This can be called after tuplesort_performsort() finishes to obtain + * printable summary information about how the sort was performed. + * spaceUsed is measured in kilobytes. + */ +void +tuplesort_get_stats(Tuplesortstate *state, + const char **sortMethod, + const char **spaceType, + long *spaceUsed) +{ + /* + * Note: it might seem we should provide both memory and disk usage for a + * disk-based sort. However, the current code doesn't track memory space + * accurately once we have begun to return tuples to the caller (since we + * don't account for pfree's the caller is expected to do), so we cannot + * rely on availMem in a disk sort. This does not seem worth the overhead + * to fix. Is it worth creating an API for the memory context code to + * tell us how much is actually used in sortcontext? + */ + if (state->tapeset) + { + *spaceType = "Disk"; + *spaceUsed = LogicalTapeSetBlocks(state->tapeset) * (BLCKSZ / 1024); + } + else + { + *spaceType = "Memory"; + *spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; + } + + switch (state->status) + { + case TSS_SORTEDINMEM: + if (state->boundUsed) + *sortMethod = "top-N heapsort"; + else + *sortMethod = "quicksort"; + break; + case TSS_SORTEDONTAPE: + *sortMethod = "external sort"; + break; + case TSS_FINALMERGE: + *sortMethod = "external merge"; + break; + default: + *sortMethod = "still in progress"; + break; + } +} + + +/* + * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. + * + * Compare two SortTuples. If checkIndex is true, use the tuple index + * as the front of the sort key; otherwise, no. + * + * Note that for checkIndex callers, the heap invariant is never + * maintained beyond the first run, and so there are no COMPARETUP() + * calls needed to distinguish tuples in HEAP_RUN_NEXT. + */ + +#define HEAPCOMPARE(tup1,tup2) \ + (checkIndex && ((tup1)->tupindex != (tup2)->tupindex || \ + (tup1)->tupindex == HEAP_RUN_NEXT) ? \ + ((tup1)->tupindex) - ((tup2)->tupindex) : \ + COMPARETUP(state, tup1, tup2)) + +/* + * Convert the existing unordered array of SortTuples to a bounded heap, + * discarding all but the smallest "state->bound" tuples. + * + * When working with a bounded heap, we want to keep the largest entry + * at the root (array entry zero), instead of the smallest as in the normal + * sort case. This allows us to discard the largest entry cheaply. + * Therefore, we temporarily reverse the sort direction. + * + * We assume that all entries in a bounded heap will always have tupindex + * zero; it therefore doesn't matter that HEAPCOMPARE() doesn't reverse + * the direction of comparison for tupindexes. + */ +static void +make_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + int i; + + Assert(state->status == TSS_INITIAL); + Assert(state->bounded); + Assert(tupcount >= state->bound); + + /* Reverse sort direction so largest entry will be at root */ + reversedirection(state); + + state->memtupcount = 0; /* make the heap empty */ + for (i = 0; i < tupcount; i++) + { + if (state->memtupcount >= state->bound && + COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) + { + /* New tuple would just get thrown out, so skip it */ + free_sort_tuple(state, &state->memtuples[i]); + CHECK_FOR_INTERRUPTS(); + } + else + { + /* Insert next tuple into heap */ + /* Must copy source tuple to avoid possible overwrite */ + SortTuple stup = state->memtuples[i]; + + tuplesort_heap_insert(state, &stup, 0, false); + + /* If heap too full, discard largest entry */ + if (state->memtupcount > state->bound) + { + free_sort_tuple(state, &state->memtuples[0]); + tuplesort_heap_siftup(state, false); + } + } + } + + Assert(state->memtupcount == state->bound); + state->status = TSS_BOUNDED; +} + +/* + * Convert the bounded heap to a properly-sorted array + */ +static void +sort_bounded_heap(Tuplesortstate *state) +{ + int tupcount = state->memtupcount; + + Assert(state->status == TSS_BOUNDED); + Assert(state->bounded); + Assert(tupcount == state->bound); + + /* + * We can unheapify in place because each sift-up will remove the largest + * entry, which we can promptly store in the newly freed slot at the end. + * Once we're down to a single-entry heap, we're done. + */ + while (state->memtupcount > 1) + { + SortTuple stup = state->memtuples[0]; + + /* this sifts-up the next-largest entry and decreases memtupcount */ + tuplesort_heap_siftup(state, false); + state->memtuples[state->memtupcount] = stup; + } + state->memtupcount = tupcount; + + /* + * Reverse sort direction back to the original state. This is not + * actually necessary but seems like a good idea for tidiness. + */ + reversedirection(state); + + state->status = TSS_SORTEDINMEM; + state->boundUsed = true; +} + +/* + * Sort all memtuples using specialized qsort() routines. + * + * Quicksort is used for small in-memory sorts. Quicksort is also generally + * preferred to replacement selection for generating runs during external sort + * operations, although replacement selection is sometimes used for the first + * run. + */ +static void +tuplesort_sort_memtuples(Tuplesortstate *state) +{ + if (state->memtupcount > 1) + { + /* Can we use the single-key sort function? */ + if (state->onlyKey != NULL) + qsort_ssup(state->memtuples, state->memtupcount, + state->onlyKey); + else + qsort_tuple(state->memtuples, + state->memtupcount, + state->comparetup, + state); + } +} + +/* + * Insert a new tuple into an empty or existing heap, maintaining the + * heap invariant. Caller is responsible for ensuring there's room. + * + * Note: we assume *tuple is a temporary variable that can be scribbled on. + * For some callers, tuple actually points to a memtuples[] entry above the + * end of the heap. This is safe as long as it's not immediately adjacent + * to the end of the heap (ie, in the [memtupcount] array entry) --- if it + * is, it might get overwritten before being moved into the heap! + */ +static void +tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple, + int tupleindex, bool checkIndex) +{ + SortTuple *memtuples; + int j; + + /* + * Save the tupleindex --- see notes above about writing on *tuple. It's a + * historical artifact that tupleindex is passed as a separate argument + * and not in *tuple, but it's notationally convenient so let's leave it + * that way. + */ + tuple->tupindex = tupleindex; + + memtuples = state->memtuples; + Assert(state->memtupcount < state->memtupsize); + Assert(!checkIndex || tupleindex == RUN_FIRST); + + CHECK_FOR_INTERRUPTS(); + + /* + * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is + * using 1-based array indexes, not 0-based. + */ + j = state->memtupcount++; + while (j > 0) + { + int i = (j - 1) >> 1; + + if (HEAPCOMPARE(tuple, &memtuples[i]) >= 0) + break; + memtuples[j] = memtuples[i]; + j = i; + } + memtuples[j] = *tuple; +} + +/* + * The tuple at state->memtuples[0] has been removed from the heap. + * Decrement memtupcount, and sift up to maintain the heap invariant. + */ +static void +tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex) +{ + SortTuple *memtuples = state->memtuples; + SortTuple *tuple; + unsigned int i, + n; + + Assert(!checkIndex || state->currentRun == RUN_FIRST); + if (--state->memtupcount <= 0) + return; + + CHECK_FOR_INTERRUPTS(); + + /* + * state->memtupcount is "int", but we use "unsigned int" for i, j, n. + * This prevents overflow in the "2 * i + 1" calculation, since at the top + * of the loop we must have i < n <= INT_MAX <= UINT_MAX/2. + */ + n = state->memtupcount; + tuple = &memtuples[n]; /* tuple that must be reinserted */ + i = 0; /* i is where the "hole" is */ + for (;;) + { + unsigned int j = 2 * i + 1; + + if (j >= n) + break; + if (j + 1 < n && + HEAPCOMPARE(&memtuples[j], &memtuples[j + 1]) > 0) + j++; + if (HEAPCOMPARE(tuple, &memtuples[j]) <= 0) + break; + memtuples[i] = memtuples[j]; + i = j; + } + memtuples[i] = *tuple; +} + +/* + * Function to reverse the sort direction from its current state + * + * It is not safe to call this when performing hash tuplesorts + */ +static void +reversedirection(Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + int nkey; + + for (nkey = 0; nkey < state->nKeys; nkey++, sortKey++) + { + sortKey->ssup_reverse = !sortKey->ssup_reverse; + sortKey->ssup_nulls_first = !sortKey->ssup_nulls_first; + } +} + + +/* + * Tape interface routines + */ + +static unsigned int +getlen(Tuplesortstate *state, int tapenum, bool eofOK) +{ + unsigned int len; + + if (LogicalTapeRead(state->tapeset, tapenum, + &len, sizeof(len)) != sizeof(len)) + elog(ERROR, "unexpected end of tape"); + if (len == 0 && !eofOK) + elog(ERROR, "unexpected end of data"); + return len; +} + +static void +markrunend(Tuplesortstate *state, int tapenum) +{ + unsigned int len = 0; + + LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); +} + +/* + * Get memory for tuple from within READTUP() routine. Allocate + * memory and account for that, or consume from tape's batch + * allocation. + * + * Memory returned here in the final on-the-fly merge case is recycled + * from tape's batch allocation. Otherwise, callers must pfree() or + * reset tuple child memory context, and account for that with a + * FREEMEM(). Currently, this only ever needs to happen in WRITETUP() + * routines. + */ +static void * +readtup_alloc(Tuplesortstate *state, int tapenum, Size tuplen) +{ + if (state->batchUsed) + { + /* + * No USEMEM() call, because during final on-the-fly merge accounting + * is based on tape-private state. ("Overflow" allocations are + * detected as an indication that a new round or preloading is + * required. Preloading marks existing contents of tape's batch buffer + * for reuse.) + */ + return mergebatchalloc(state, tapenum, tuplen); + } + else + { + char *ret; + + /* Batch allocation yet to be performed */ + ret = MemoryContextAlloc(state->tuplecontext, tuplen); + USEMEM(state, GetMemoryChunkSpace(ret)); + return ret; + } +} + + +/* + * Routines specialized for HeapTuple (actually MinimalTuple) case + */ + +static int +comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTupleData ltup; + HeapTupleData rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + AttrNumber attno; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + ltup.t_len = ((MinimalTuple) a->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + ltup.t_data = (HeapTupleHeader) ((char *) a->tuple - MINIMAL_TUPLE_OFFSET); + rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET; + rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET); + tupDesc = state->tupDesc; + + if (sortKey->abbrev_converter) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + sortKey++; + for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++) + { + attno = sortKey->ssup_attno; + + datum1 = heap_getattr(<up, attno, tupDesc, &isnull1); + datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + return 0; +} + +static void +copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* + * We expect the passed "tup" to be a TupleTableSlot, and form a + * MinimalTuple using the exported interface for that. + */ + TupleTableSlot *slot = (TupleTableSlot *) tup; + Datum original; + MinimalTuple tuple; + HeapTupleData htup; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = ExecCopySlotMinimalTuple(slot); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + original = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); + + MemoryContextSwitchTo(oldcontext); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + htup.t_len = ((MinimalTuple) mtup->tuple)->t_len + + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple - + MINIMAL_TUPLE_OFFSET); + + mtup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_heap(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + MinimalTuple tuple = (MinimalTuple) stup->tuple; + + /* the part of the MinimalTuple we'll write: */ + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + unsigned int tupbodylen = tuple->t_len - MINIMAL_TUPLE_DATA_OFFSET; + + /* total on-disk footprint: */ + unsigned int tuplen = tupbodylen + sizeof(int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_free_minimal_tuple(tuple); +} + +static void +readtup_heap(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tupbodylen = len - sizeof(int); + unsigned int tuplen = tupbodylen + MINIMAL_TUPLE_DATA_OFFSET; + MinimalTuple tuple = (MinimalTuple) readtup_alloc(state, tapenum, tuplen); + char *tupbody = (char *) tuple + MINIMAL_TUPLE_DATA_OFFSET; + HeapTupleData htup; + + /* read in the tuple proper */ + tuple->t_len = tuplen; + LogicalTapeReadExact(state->tapeset, tapenum, + tupbody, tupbodylen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET; + htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET); + stup->datum1 = heap_getattr(&htup, + state->sortKeys[0].ssup_attno, + state->tupDesc, + &stup->isnull1); +} + +static void +movetup_heap(void *dest, void *src, unsigned int len) +{ + memmove(dest, src, len); +} + +/* + * Routines specialized for the CLUSTER case (HeapTuple data, with + * comparisons per a btree index definition) + */ + +static int +comparetup_cluster(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + SortSupport sortKey = state->sortKeys; + HeapTuple ltup; + HeapTuple rtup; + TupleDesc tupDesc; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + AttrNumber leading = state->indexInfo->ii_KeyAttrNumbers[0]; + + /* Be prepared to compare additional sort keys */ + ltup = (HeapTuple) a->tuple; + rtup = (HeapTuple) b->tuple; + tupDesc = state->tupDesc; + + /* Compare the leading sort key, if it's simple */ + if (leading != 0) + { + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + if (sortKey->abbrev_converter) + { + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + } + if (compare != 0 || state->nKeys == 1) + return compare; + /* Compare additional columns the hard way */ + sortKey++; + nkey = 1; + } + else + { + /* Must compare all keys the hard way */ + nkey = 0; + } + + if (state->indexInfo->ii_Expressions == NULL) + { + /* If not expression index, just compare the proper heap attrs */ + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + AttrNumber attno = state->indexInfo->ii_KeyAttrNumbers[nkey]; + + datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1); + datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + } + else + { + /* + * In the expression index case, compute the whole index tuple and + * then compare values. It would perhaps be faster to compute only as + * many columns as we need to compare, but that would require + * duplicating all the logic in FormIndexDatum. + */ + Datum l_index_values[INDEX_MAX_KEYS]; + bool l_index_isnull[INDEX_MAX_KEYS]; + Datum r_index_values[INDEX_MAX_KEYS]; + bool r_index_isnull[INDEX_MAX_KEYS]; + TupleTableSlot *ecxt_scantuple; + + /* Reset context each time to prevent memory leakage */ + ResetPerTupleExprContext(state->estate); + + ecxt_scantuple = GetPerTupleExprContext(state->estate)->ecxt_scantuple; + + ExecStoreTuple(ltup, ecxt_scantuple, InvalidBuffer, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + l_index_values, l_index_isnull); + + ExecStoreTuple(rtup, ecxt_scantuple, InvalidBuffer, false); + FormIndexDatum(state->indexInfo, ecxt_scantuple, state->estate, + r_index_values, r_index_isnull); + + for (; nkey < state->nKeys; nkey++, sortKey++) + { + compare = ApplySortComparator(l_index_values[nkey], + l_index_isnull[nkey], + r_index_values[nkey], + r_index_isnull[nkey], + sortKey); + if (compare != 0) + return compare; + } + } + + return 0; +} + +static void +copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + HeapTuple tuple = (HeapTuple) tup; + Datum original; + MemoryContext oldcontext = MemoryContextSwitchTo(state->tuplecontext); + + /* copy the tuple into sort storage */ + tuple = heap_copytuple(tuple); + stup->tuple = (void *) tuple; + USEMEM(state, GetMemoryChunkSpace(tuple)); + + MemoryContextSwitchTo(oldcontext); + + /* + * set up first-column key value, and potentially abbreviate, if it's a + * simple column + */ + if (state->indexInfo->ii_KeyAttrNumbers[0] == 0) + return; + + original = heap_getattr(tuple, + state->indexInfo->ii_KeyAttrNumbers[0], + state->tupDesc, + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (HeapTuple) mtup->tuple; + mtup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_KeyAttrNumbers[0], + state->tupDesc, + &mtup->isnull1); + } + } +} + +static void +writetup_cluster(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + HeapTuple tuple = (HeapTuple) stup->tuple; + unsigned int tuplen = tuple->t_len + sizeof(ItemPointerData) + sizeof(int); + + /* We need to store t_self, but not other fields of HeapTupleData */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + LogicalTapeWrite(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + + FREEMEM(state, GetMemoryChunkSpace(tuple)); + heap_freetuple(tuple); +} + +static void +readtup_cluster(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int tuplen) +{ + unsigned int t_len = tuplen - sizeof(ItemPointerData) - sizeof(int); + HeapTuple tuple = (HeapTuple) readtup_alloc(state, + tapenum, + t_len + HEAPTUPLESIZE); + + /* Reconstruct the HeapTupleData header */ + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); + tuple->t_len = t_len; + LogicalTapeReadExact(state->tapeset, tapenum, + &tuple->t_self, sizeof(ItemPointerData)); + /* We don't currently bother to reconstruct t_tableOid */ + tuple->t_tableOid = InvalidOid; + /* Read in the tuple body */ + LogicalTapeReadExact(state->tapeset, tapenum, + tuple->t_data, tuple->t_len); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value, if it's a simple column */ + if (state->indexInfo->ii_KeyAttrNumbers[0] != 0) + stup->datum1 = heap_getattr(tuple, + state->indexInfo->ii_KeyAttrNumbers[0], + state->tupDesc, + &stup->isnull1); +} + +static void +movetup_cluster(void *dest, void *src, unsigned int len) +{ + HeapTuple tuple; + + memmove(dest, src, len); + + /* Repoint the HeapTupleData header */ + tuple = (HeapTuple) dest; + tuple->t_data = (HeapTupleHeader) ((char *) tuple + HEAPTUPLESIZE); +} + + +/* + * Routines specialized for IndexTuple case + * + * The btree and hash cases require separate comparison functions, but the + * IndexTuple representation is the same so the copy/write/read support + * functions can be shared. + */ + +static int +comparetup_index_btree(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + /* + * This is similar to comparetup_heap(), but expects index tuples. There + * is also special handling for enforcing uniqueness, and special + * treatment for equal keys at the end. + */ + SortSupport sortKey = state->sortKeys; + IndexTuple tuple1; + IndexTuple tuple2; + int keysz; + TupleDesc tupDes; + bool equal_hasnull = false; + int nkey; + int32 compare; + Datum datum1, + datum2; + bool isnull1, + isnull2; + + + /* Compare the leading sort key */ + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + sortKey); + if (compare != 0) + return compare; + + /* Compare additional sort keys */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + keysz = state->nKeys; + tupDes = RelationGetDescr(state->indexRel); + + if (sortKey->abbrev_converter) + { + datum1 = index_getattr(tuple1, 1, tupDes, &isnull1); + datum2 = index_getattr(tuple2, 1, tupDes, &isnull2); + + compare = ApplySortAbbrevFullComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; + } + + /* they are equal, so we only need to examine one null flag */ + if (a->isnull1) + equal_hasnull = true; + + sortKey++; + for (nkey = 2; nkey <= keysz; nkey++, sortKey++) + { + datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1); + datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2); + + compare = ApplySortComparator(datum1, isnull1, + datum2, isnull2, + sortKey); + if (compare != 0) + return compare; /* done when we find unequal attributes */ + + /* they are equal, so we only need to examine one null flag */ + if (isnull1) + equal_hasnull = true; + } + + /* + * If btree has asked us to enforce uniqueness, complain if two equal + * tuples are detected (unless there was at least one NULL field). + * + * It is sufficient to make the test here, because if two tuples are equal + * they *must* get compared at some stage of the sort --- otherwise the + * sort algorithm wouldn't have checked whether one must appear before the + * other. + */ + if (state->enforceUnique && !equal_hasnull) + { + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + char *key_desc; + + /* + * Some rather brain-dead implementations of qsort (such as the one in + * QNX 4) will sometimes call the comparison routine to compare a + * value to itself, but we always use our own implementation, which + * does not. + */ + Assert(tuple1 != tuple2); + + index_deform_tuple(tuple1, tupDes, values, isnull); + + key_desc = BuildIndexValueDescription(state->indexRel, values, isnull); + + ereport(ERROR, + (errcode(ERRCODE_UNIQUE_VIOLATION), + errmsg("could not create unique index \"%s\"", + RelationGetRelationName(state->indexRel)), + key_desc ? errdetail("Key %s is duplicated.", key_desc) : + errdetail("Duplicate keys exist."), + errtableconstraint(state->heapRel, + RelationGetRelationName(state->indexRel)))); + } + + /* + * If key values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static int +comparetup_index_hash(const SortTuple *a, const SortTuple *b, + Tuplesortstate *state) +{ + uint32 hash1; + uint32 hash2; + IndexTuple tuple1; + IndexTuple tuple2; + + /* + * Fetch hash keys and mask off bits we don't want to sort by. We know + * that the first column of the index tuple is the hash key. + */ + Assert(!a->isnull1); + hash1 = DatumGetUInt32(a->datum1) & state->hash_mask; + Assert(!b->isnull1); + hash2 = DatumGetUInt32(b->datum1) & state->hash_mask; + + if (hash1 > hash2) + return 1; + else if (hash1 < hash2) + return -1; + + /* + * If hash values are equal, we sort on ItemPointer. This does not affect + * validity of the finished index, but it may be useful to have index + * scans in physical order. + */ + tuple1 = (IndexTuple) a->tuple; + tuple2 = (IndexTuple) b->tuple; + + { + BlockNumber blk1 = ItemPointerGetBlockNumber(&tuple1->t_tid); + BlockNumber blk2 = ItemPointerGetBlockNumber(&tuple2->t_tid); + + if (blk1 != blk2) + return (blk1 < blk2) ? -1 : 1; + } + { + OffsetNumber pos1 = ItemPointerGetOffsetNumber(&tuple1->t_tid); + OffsetNumber pos2 = ItemPointerGetOffsetNumber(&tuple2->t_tid); + + if (pos1 != pos2) + return (pos1 < pos2) ? -1 : 1; + } + + /* ItemPointer values should never be equal */ + Assert(false); + + return 0; +} + +static void +copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + IndexTuple tuple = (IndexTuple) tup; + unsigned int tuplen = IndexTupleSize(tuple); + IndexTuple newtuple; + Datum original; + + /* copy the tuple into sort storage */ + newtuple = (IndexTuple) MemoryContextAlloc(state->tuplecontext, tuplen); + memcpy(newtuple, tuple, tuplen); + USEMEM(state, GetMemoryChunkSpace(newtuple)); + stup->tuple = (void *) newtuple; + /* set up first-column key value */ + original = index_getattr(newtuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); + + if (!state->sortKeys->abbrev_converter || stup->isnull1) + { + /* + * Store ordinary Datum representation, or NULL value. If there is a + * converter it won't expect NULL values, and cost model is not + * required to account for NULL, so in that case we avoid calling + * converter and just set datum1 to zeroed representation (to be + * consistent, and to support cheap inequality tests for NULL + * abbreviated keys). + */ + stup->datum1 = original; + } + else if (!consider_abort_common(state)) + { + /* Store abbreviated key representation */ + stup->datum1 = state->sortKeys->abbrev_converter(original, + state->sortKeys); + } + else + { + /* Abort abbreviation */ + int i; + + stup->datum1 = original; + + /* + * Set state to be consistent with never trying abbreviation. + * + * Alter datum1 representation in already-copied tuples, so as to + * ensure a consistent representation (current tuple was just + * handled). It does not matter if some dumped tuples are already + * sorted on tape, since serialized tuples lack abbreviated keys + * (TSS_BUILDRUNS state prevents control reaching here in any case). + */ + for (i = 0; i < state->memtupcount; i++) + { + SortTuple *mtup = &state->memtuples[i]; + + tuple = (IndexTuple) mtup->tuple; + mtup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &mtup->isnull1); + } + } +} + +static void +writetup_index(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + IndexTuple tuple = (IndexTuple) stup->tuple; + unsigned int tuplen; + + tuplen = IndexTupleSize(tuple) + sizeof(tuplen); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + LogicalTapeWrite(state->tapeset, tapenum, + (void *) tuple, IndexTupleSize(tuple)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &tuplen, sizeof(tuplen)); + + FREEMEM(state, GetMemoryChunkSpace(tuple)); + pfree(tuple); +} + +static void +readtup_index(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + IndexTuple tuple = (IndexTuple) readtup_alloc(state, tapenum, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + tuple, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); + stup->tuple = (void *) tuple; + /* set up first-column key value */ + stup->datum1 = index_getattr(tuple, + 1, + RelationGetDescr(state->indexRel), + &stup->isnull1); +} + +static void +movetup_index(void *dest, void *src, unsigned int len) +{ + memmove(dest, src, len); +} + +/* + * Routines specialized for DatumTuple case + */ + +static int +comparetup_datum(const SortTuple *a, const SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + state->sortKeys); + if (compare != 0) + return compare; + + /* if we have abbreviations, then "tuple" has the original value */ + + if (state->sortKeys->abbrev_converter) + compare = ApplySortAbbrevFullComparator(PointerGetDatum(a->tuple), a->isnull1, + PointerGetDatum(b->tuple), b->isnull1, + state->sortKeys); + + return compare; +} + +static void +copytup_datum(Tuplesortstate *state, SortTuple *stup, void *tup) +{ + /* Not currently needed */ + elog(ERROR, "copytup_datum() should not be called"); +} + +static void +writetup_datum(Tuplesortstate *state, int tapenum, SortTuple *stup) +{ + void *waddr; + unsigned int tuplen; + unsigned int writtenlen; + + if (stup->isnull1) + { + waddr = NULL; + tuplen = 0; + } + else if (!state->tuples) + { + waddr = &stup->datum1; + tuplen = sizeof(Datum); + } + else + { + waddr = stup->tuple; + tuplen = datumGetSize(PointerGetDatum(stup->tuple), false, state->datumTypeLen); + Assert(tuplen != 0); + } + + writtenlen = tuplen + sizeof(unsigned int); + + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(state->tapeset, tapenum, + waddr, tuplen); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(state->tapeset, tapenum, + (void *) &writtenlen, sizeof(writtenlen)); + + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + stup->tuple = NULL; + } +} + +static void +readtup_datum(Tuplesortstate *state, SortTuple *stup, + int tapenum, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + + if (tuplen == 0) + { + /* it's NULL */ + stup->datum1 = (Datum) 0; + stup->isnull1 = true; + stup->tuple = NULL; + } + else if (!state->tuples) + { + Assert(tuplen == sizeof(Datum)); + LogicalTapeReadExact(state->tapeset, tapenum, + &stup->datum1, tuplen); + stup->isnull1 = false; + stup->tuple = NULL; + } + else + { + void *raddr = readtup_alloc(state, tapenum, tuplen); + + LogicalTapeReadExact(state->tapeset, tapenum, + raddr, tuplen); + stup->datum1 = PointerGetDatum(raddr); + stup->isnull1 = false; + stup->tuple = raddr; + } + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact(state->tapeset, tapenum, + &tuplen, sizeof(tuplen)); +} + +static void +movetup_datum(void *dest, void *src, unsigned int len) +{ + memmove(dest, src, len); +} + +/* + * Convenience routine to free a tuple previously loaded into sort memory + */ +static void +free_sort_tuple(Tuplesortstate *state, SortTuple *stup) +{ + if (stup->tuple) + { + FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); + pfree(stup->tuple); + } +} From be040ee33ed53e214611353717e574a6cbecb424 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 10 Nov 2021 14:33:33 +0400 Subject: [PATCH 115/182] Make rumsort use vanilla tuplesort functions, part 2: - Make rumsort use vanilla tuplesort functions. They are included in tuplesortXX.c which are copied without change from vanilla XX sources (src/backend/utils/sort/tuplesort.c) - Add compatibility with PG15 logtape changes --- src/rumsort.c | 2131 ++++--------------------------------------------- src/rumsort.h | 2 +- 2 files changed, 153 insertions(+), 1980 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 37f9e5760c..a78e2b931b 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -23,19 +23,31 @@ #include "executor/executor.h" #include "utils/logtape.h" #include "utils/pg_rusage.h" - -#include "rum.h" /* RumItem */ - -/* sort-type codes for sort__start probes */ -#define HEAP_SORT 0 -#define INDEX_SORT 1 -#define DATUM_SORT 2 -#define CLUSTER_SORT 3 +#include "utils/tuplesort.h" + +#include "rum.h" /* RumItem */ + +#if PG_VERSION_NUM >= 150000 +#include "tuplesort15.c" +#elif PG_VERSION_NUM >= 140000 +#include "tuplesort14.c" +#elif PG_VERSION_NUM >= 130000 +#include "tuplesort13.c" +#elif PG_VERSION_NUM >= 120000 +#include "tuplesort12.c" +#elif PG_VERSION_NUM >= 110000 +#include "tuplesort11.c" +#elif PG_VERSION_NUM >= 100000 +#include "tuplesort10.c" +#elif PG_VERSION_NUM >= 90600 +#include "tuplesort96.c" +#endif #if PG_VERSION_NUM < 100000 /* Provide fallback for old version of tape interface for 9.6 */ #define LogicalTapeRewindForRead(x, y, z) LogicalTapeRewind((x), (y), false) #define LogicalTapeRewindForWrite(x, y) LogicalTapeRewind((x), (y), true) +#define tuplesort_gettuple_common(x, y, z) tuplesort_gettuple_common((x), (y), (z), should_free) #endif #if PG_VERSION_NUM >= 110000 @@ -47,9 +59,13 @@ #define LogicalTapeFreeze(X, Y) LogicalTapeFreeze(X, Y, NULL) #endif -/* - * Below are copied definitions from src/backend/utils/sort/tuplesort.c. - */ +#if PG_VERSION_NUM >= 150000 +#define TAPE(state, tapenum) state->result_tape +#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->result_tape, ##args) +#else +#define TAPE(state, tapenum) state->tapeset, tapenum +#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->tapeset, tapenum, ##args) +#endif /* For PGPRO since v.13 trace_sort is imported from backend by including its * declaration in guc.h (guc.h contains added Windows export/import magic to be done @@ -60,579 +76,52 @@ #ifdef TRACE_SORT #if ( !defined (_MSC_VER) || (PG_VERSION_NUM >= 130000 && defined (PGPRO_VERSION)) ) #include "utils/guc.h" -#else -bool trace_sort = false; #endif #endif -typedef struct -{ - void *tuple; /* the tuple proper */ - Datum datum1; /* value of first key column */ - bool isnull1; /* is first key column NULL? */ - int tupindex; /* see notes above */ -} SortTuple; - -typedef enum -{ - TSS_INITIAL, /* Loading tuples; still within memory limit */ - TSS_BOUNDED, /* Loading tuples into bounded-size heap */ - TSS_BUILDRUNS, /* Loading tuples; writing to tape */ - TSS_SORTEDINMEM, /* Sort completed entirely in memory */ - TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ - TSS_FINALMERGE /* Performing final merge on-the-fly */ -} TupSortStatus; - -#define MINORDER 6 /* minimum merge order */ -#define TAPE_BUFFER_OVERHEAD (BLCKSZ * 3) -#define MERGE_BUFFER_SIZE (BLCKSZ * 32) - -typedef int (*SortTupleComparator) (const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); - /* - * Renamed copy of Tuplesortstate. + * We need extra field in a state structure but we should not modify struct RumTuplesortstate + * which is inherited from Tuplesortstate core function. */ -struct RumTuplesortstate -{ - TupSortStatus status; /* enumerated value as shown above */ - int nKeys; /* number of columns in sort key */ - bool randomAccess; /* did caller request random access? */ - bool bounded; /* did caller specify a maximum number of - * tuples to return? */ - bool boundUsed; /* true if we made use of a bounded heap */ - int bound; /* if bounded, the maximum number of tuples */ - long availMem; /* remaining memory available, in bytes */ - long allowedMem; /* total memory allowed, in bytes */ - int maxTapes; /* number of tapes (Knuth's T) */ - int tapeRange; /* maxTapes-1 (Knuth's P) */ - MemoryContext sortcontext; /* memory context holding all sort data */ - LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ - - /* - * These function pointers decouple the routines that must know what kind - * of tuple we are sorting from the routines that don't need to know it. - * They are set up by the rum_tuplesort_begin_xxx routines. - * - * Function to compare two tuples; result is per qsort() convention, ie: - * <0, 0, >0 according as ab. The API must match - * qsort_arg_comparator. - */ - SortTupleComparator comparetup; - - /* - * Function to copy a supplied input tuple into palloc'd space and set up - * its SortTuple representation (ie, set tuple/datum1/isnull1). Also, - * state->availMem must be decreased by the amount of space used for the - * tuple copy (note the SortTuple struct itself is not counted). - */ - void (*copytup) (RumTuplesortstate *state, SortTuple *stup, void *tup); - - /* - * Function to write a stored tuple onto tape. The representation of the - * tuple on tape need not be the same as it is in memory; requirements on - * the tape representation are given below. After writing the tuple, - * pfree() the out-of-line data (not the SortTuple struct!), and increase - * state->availMem by the amount of memory space thereby released. - */ - void (*writetup) (RumTuplesortstate *state, int tapenum, - SortTuple *stup); - - /* - * Function to read a stored tuple from tape back into memory. 'len' is - * the already-read length of the stored tuple. Create a palloc'd copy, - * initialize tuple/datum1/isnull1 in the target SortTuple struct, and - * decrease state->availMem by the amount of memory space consumed. - */ - void (*readtup) (RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); - - /* - * Function to reverse the sort direction from its current state. (We - * could dispense with this if we wanted to enforce that all variants - * represent the sort key information alike.) - */ - void (*reversedirection) (RumTuplesortstate *state); - - /* - * This array holds the tuples now in sort memory. If we are in state - * INITIAL, the tuples are in no particular order; if we are in state - * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS - * and FINALMERGE, the tuples are organized in "heap" order per Algorithm - * H. (Note that memtupcount only counts the tuples that are part of the - * heap --- during merge passes, memtuples[] entries beyond tapeRange are - * never in the heap and are used to hold pre-read tuples.) In state - * SORTEDONTAPE, the array is not used. - */ - SortTuple *memtuples; /* array of SortTuple structs */ - int memtupcount; /* number of tuples currently present */ - int memtupsize; /* allocated length of memtuples array */ - bool growmemtuples; /* memtuples' growth still underway? */ - - /* Buffer size to use for reading input tapes, during merge. */ - size_t read_buffer_size; - - /* - * While building initial runs, this is the current output run number - * (starting at 0). Afterwards, it is the number of initial runs we made. - */ - int currentRun; - - /* - * Unless otherwise noted, all pointer variables below are pointers to - * arrays of length maxTapes, holding per-tape data. - */ - - /* - * These variables are only used during merge passes. mergeactive[i] is - * true if we are reading an input run from (actual) tape number i and - * have not yet exhausted that run. mergenext[i] is the memtuples index - * of the next pre-read tuple (next to be loaded into the heap) for tape - * i, or 0 if we are out of pre-read tuples. mergelast[i] similarly - * points to the last pre-read tuple from each tape. mergeavailslots[i] - * is the number of unused memtuples[] slots reserved for tape i, and - * mergeavailmem[i] is the amount of unused space allocated for tape i. - * mergefreelist and mergefirstfree keep track of unused locations in the - * memtuples[] array. The memtuples[].tupindex fields link together - * pre-read tuples for each tape as well as recycled locations in - * mergefreelist. It is OK to use 0 as a null link in these lists, because - * memtuples[0] is part of the merge heap and is never a pre-read tuple. - */ - bool *mergeactive; /* active input run source? */ - int *mergenext; /* first preread tuple for each source */ - int *mergelast; /* last preread tuple for each source */ - int *mergeavailslots; /* slots left for prereading each tape */ - long *mergeavailmem; /* availMem for prereading each tape */ - int mergefreelist; /* head of freelist of recycled slots */ - int mergefirstfree; /* first slot never used in this merge */ - - /* - * Variables for Algorithm D. Note that destTape is a "logical" tape - * number, ie, an index into the tp_xxx[] arrays. Be careful to keep - * "logical" and "actual" tape numbers straight! - */ - int Level; /* Knuth's l */ - int destTape; /* current output tape (Knuth's j, less 1) */ - int *tp_fib; /* Target Fibonacci run counts (A[]) */ - int *tp_runs; /* # of real runs on each tape */ - int *tp_dummy; /* # of dummy runs for each tape (D[]) */ - int *tp_tapenum; /* Actual tape numbers (TAPE[]) */ - int activeTapes; /* # of active input tapes in merge pass */ - - /* - * These variables are used after completion of sorting to keep track of - * the next tuple to return. (In the tape case, the tape's current read - * position is also critical state.) - */ - int result_tape; /* actual tape number of finished output */ - int current; /* array index (only used if SORTEDINMEM) */ - bool eof_reached; /* reached EOF (needed for cursors) */ - - /* markpos_xxx holds marked position for mark and restore */ - long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ - int markpos_offset; /* saved "current", or offset in tape block */ - bool markpos_eof; /* saved "eof_reached" */ - - /* - * These variables are specific to the MinimalTuple case; they are set by - * rum_tuplesort_begin_heap and used only by the MinimalTuple routines. - */ - TupleDesc tupDesc; - SortSupport sortKeys; /* array of length nKeys */ - - /* - * This variable is shared by the single-key MinimalTuple case and the - * Datum case (which both use qsort_ssup()). Otherwise it's NULL. - */ - SortSupport onlyKey; - - /* - * These variables are specific to the CLUSTER case; they are set by - * rum_tuplesort_begin_cluster. Note CLUSTER also uses tupDesc and - * indexScanKey. - */ - IndexInfo *indexInfo; /* info about index being used for reference */ - EState *estate; /* for evaluating index expressions */ - - /* - * These variables are specific to the IndexTuple case; they are set by - * rum_tuplesort_begin_index_xxx and used only by the IndexTuple routines. - */ - Relation heapRel; /* table the index is being built on */ - Relation indexRel; /* index being built */ - - /* These are specific to the index_btree subcase: */ - ScanKey indexScanKey; - bool enforceUnique; /* complain if we find duplicate tuples */ - - /* These are specific to the index_hash subcase: */ - uint32 hash_mask; /* mask for sortable part of hash code */ - - /* - * These variables are specific to the Datum case; they are set by - * rum_tuplesort_begin_datum and used only by the DatumTuple routines. - */ - Oid datumType; - /* we need typelen and byval in order to know how to copy the Datums. */ - int datumTypeLen; - bool datumTypeByVal; - - bool reverse; - - /* Do we need ItemPointer comparison in comparetup_rum()? */ - bool compareItemPointer; - - /* compare_rumitem */ - FmgrInfo *cmp; - - /* - * Resource snapshot for time of sort start. - */ -#ifdef TRACE_SORT - PGRUsage ru_start; +typedef struct RumTuplesortstateExt +{ + RumTuplesortstate ts; + FmgrInfo *cmp; +} RumTuplesortstateExt; + +static RumTuplesortstate * rum_tuplesort_begin_common(int workMem, bool randomAccess); +static int comparetup_rum_true(const SortTuple *a, const SortTuple *b, + RumTuplesortstate * state); +static int comparetup_rum_false(const SortTuple *a, const SortTuple *b, + RumTuplesortstate * state); +static int comparetup_rum(const SortTuple *a, const SortTuple *b, + RumTuplesortstate * state, bool compareItemPointer); + +static void copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup); + +#if PG_VERSION_NUM >= 150000 +static void writetup_rum(RumTuplesortstate * state, LogicalTape *unused, + SortTuple *stup); +static void readtup_rum(RumTuplesortstate * state, SortTuple *stup, + LogicalTape *unused, unsigned int len); +static void writetup_rumitem(RumTuplesortstate * state, LogicalTape *unused, + SortTuple *stup); +static void readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, + LogicalTape *unused, unsigned int len); +#else +static void writetup_rum(RumTuplesortstate * state, int tapenum, + SortTuple *stup); +static void readtup_rum(RumTuplesortstate * state, SortTuple *stup, + int tapenum, unsigned int len); +static void writetup_rumitem(RumTuplesortstate * state, int tapenum, + SortTuple *stup); +static void readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, + int tapenum, unsigned int len); #endif -}; - -#define COMPARETUP(state,a,b) ((*(state)->comparetup) (a, b, state)) -#define COPYTUP(state,stup,tup) ((*(state)->copytup) (state, stup, tup)) -#define WRITETUP(state,tape,stup) ((*(state)->writetup) (state, tape, stup)) -#define READTUP(state,stup,tape,len) ((*(state)->readtup) (state, stup, tape, len)) -#define REVERSEDIRECTION(state) ((*(state)->reversedirection) (state)) -#define LACKMEM(state) ((state)->availMem < 0) -#define USEMEM(state,amt) ((state)->availMem -= (amt)) -#define FREEMEM(state,amt) ((state)->availMem += (amt)) - -/* When using this macro, beware of double evaluation of len */ -#define LogicalTapeReadExact(tapeset, tapenum, ptr, len) \ - do { \ - if (LogicalTapeRead(tapeset, tapenum, ptr, len) != (size_t) (len)) \ - elog(ERROR, "unexpected end of data"); \ - } while(0) - - -static RumTuplesortstate *rum_tuplesort_begin_common(int workMem, bool randomAccess); -static void puttuple_common(RumTuplesortstate *state, SortTuple *tuple); -static void inittapes(RumTuplesortstate *state); -static void selectnewtape(RumTuplesortstate *state); -static void mergeruns(RumTuplesortstate *state); -static void mergeonerun(RumTuplesortstate *state); -static void beginmerge(RumTuplesortstate *state); -static void mergepreread(RumTuplesortstate *state); -static void mergeprereadone(RumTuplesortstate *state, int srcTape); -static void dumptuples(RumTuplesortstate *state, bool alltuples); -static void make_bounded_heap(RumTuplesortstate *state); -static void sort_bounded_heap(RumTuplesortstate *state); -static void rum_tuplesort_heap_insert(RumTuplesortstate *state, SortTuple *tuple, - int tupleindex, bool checkIndex); -static void rum_tuplesort_heap_siftup(RumTuplesortstate *state, bool checkIndex); -static unsigned int getlen(RumTuplesortstate *state, int tapenum, bool eofOK); -static void markrunend(RumTuplesortstate *state, int tapenum); -static void free_sort_tuple(RumTuplesortstate *state, SortTuple *stup); -static int comparetup_rum(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static void copytup_rum(RumTuplesortstate *state, SortTuple *stup, void *tup); -static void writetup_rum(RumTuplesortstate *state, int tapenum, - SortTuple *stup); -static void readtup_rum(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); -static void reversedirection_rum(RumTuplesortstate *state); -static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, - RumTuplesortstate *state); -static void copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, void *tup); -static void writetup_rumitem(RumTuplesortstate *state, int tapenum, - SortTuple *stup); -static void readtup_rumitem(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len); - -/* - * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts - * any variant of SortTuples, using the appropriate comparetup function. - * qsort_ssup() is specialized for the case where the comparetup function - * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts - * and Datum sorts. - */ -/* #include "qsort_tuple.c" */ - -static void -swapfunc(SortTuple *a, SortTuple *b, size_t n) -{ - do - { - SortTuple t = *a; - - *a++ = *b; - *b++ = t; - } while (--n > 0); -} - -#define cmp_ssup(a, b, ssup) \ - ApplySortComparator((a)->datum1, (a)->isnull1, \ - (b)->datum1, (b)->isnull1, ssup) - -#define swap(a, b) \ - do { \ - SortTuple t = *(a); \ - *(a) = *(b); \ - *(b) = t; \ - } while (0); - -#define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n) - -static SortTuple * -med3_tuple(SortTuple *a, SortTuple *b, SortTuple *c, SortTupleComparator cmp_tuple, RumTuplesortstate *state) -{ - return cmp_tuple(a, b, state) < 0 ? - (cmp_tuple(b, c, state) < 0 ? b : - (cmp_tuple(a, c, state) < 0 ? c : a)) - : (cmp_tuple(b, c, state) > 0 ? b : - (cmp_tuple(a, c, state) < 0 ? a : c)); -} - -static SortTuple * -med3_ssup(SortTuple *a, SortTuple *b, SortTuple *c, SortSupport ssup) -{ - return cmp_ssup(a, b, ssup) < 0 ? - (cmp_ssup(b, c, ssup) < 0 ? b : - (cmp_ssup(a, c, ssup) < 0 ? c : a)) - : (cmp_ssup(b, c, ssup) > 0 ? b : - (cmp_ssup(a, c, ssup) < 0 ? a : c)); -} - -static void -qsort_ssup(SortTuple *a, size_t n, SortSupport ssup) -{ - SortTuple *pa, - *pb, - *pc, - *pd, - *pl, - *pm, - *pn; - size_t d1, - d2; - int r, - presorted; - -loop: - CHECK_FOR_INTERRUPTS(); - if (n < 7) - { - for (pm = a + 1; pm < a + n; pm++) - for (pl = pm; pl > a && cmp_ssup(pl - 1, pl, ssup) > 0; pl--) - swap(pl, pl - 1); - return; - } - presorted = 1; - for (pm = a + 1; pm < a + n; pm++) - { - CHECK_FOR_INTERRUPTS(); - if (cmp_ssup(pm - 1, pm, ssup) > 0) - { - presorted = 0; - break; - } - } - if (presorted) - return; - pm = a + (n / 2); - if (n > 7) - { - pl = a; - pn = a + (n - 1); - if (n > 40) - { - size_t d = (n / 8); - - pl = med3_ssup(pl, pl + d, pl + 2 * d, ssup); - pm = med3_ssup(pm - d, pm, pm + d, ssup); - pn = med3_ssup(pn - 2 * d, pn - d, pn, ssup); - } - pm = med3_ssup(pl, pm, pn, ssup); - } - swap(a, pm); - pa = pb = a + 1; - pc = pd = a + (n - 1); - for (;;) - { - while (pb <= pc && (r = cmp_ssup(pb, a, ssup)) <= 0) - { - if (r == 0) - { - swap(pa, pb); - pa++; - } - pb++; - CHECK_FOR_INTERRUPTS(); - } - while (pb <= pc && (r = cmp_ssup(pc, a, ssup)) >= 0) - { - if (r == 0) - { - swap(pc, pd); - pd--; - } - pc--; - CHECK_FOR_INTERRUPTS(); - } - if (pb > pc) - break; - swap(pb, pc); - pb++; - pc--; - } - pn = a + n; - d1 = Min(pa - a, pb - pa); - vecswap(a, pb - d1, d1); - d1 = Min(pd - pc, pn - pd - 1); - vecswap(pb, pn - d1, d1); - d1 = pb - pa; - d2 = pd - pc; - if (d1 <= d2) - { - /* Recurse on left partition, then iterate on right partition */ - if (d1 > 1) - qsort_ssup(a, d1, ssup); - if (d2 > 1) - { - /* Iterate rather than recurse to save stack space */ - /* qsort_ssup(pn - d2, d2, ssup); */ - a = pn - d2; - n = d2; - goto loop; - } - } - else - { - /* Recurse on right partition, then iterate on left partition */ - if (d2 > 1) - qsort_ssup(pn - d2, d2, ssup); - if (d1 > 1) - { - /* Iterate rather than recurse to save stack space */ - /* qsort_ssup(a, d1, ssup); */ - n = d1; - goto loop; - } - } -} - -static void -qsort_tuple(SortTuple *a, size_t n, SortTupleComparator cmp_tuple, RumTuplesortstate *state) -{ - SortTuple *pa, - *pb, - *pc, - *pd, - *pl, - *pm, - *pn; - size_t d1, - d2; - int r, - presorted; - -loop: - CHECK_FOR_INTERRUPTS(); - if (n < 7) - { - for (pm = a + 1; pm < a + n; pm++) - for (pl = pm; pl > a && cmp_tuple(pl - 1, pl, state) > 0; pl--) - swap(pl, pl - 1); - return; - } - presorted = 1; - for (pm = a + 1; pm < a + n; pm++) - { - CHECK_FOR_INTERRUPTS(); - if (cmp_tuple(pm - 1, pm, state) > 0) - { - presorted = 0; - break; - } - } - if (presorted) - return; - pm = a + (n / 2); - if (n > 7) - { - pl = a; - pn = a + (n - 1); - if (n > 40) - { - size_t d = (n / 8); - pl = med3_tuple(pl, pl + d, pl + 2 * d, cmp_tuple, state); - pm = med3_tuple(pm - d, pm, pm + d, cmp_tuple, state); - pn = med3_tuple(pn - 2 * d, pn - d, pn, cmp_tuple, state); - } - pm = med3_tuple(pl, pm, pn, cmp_tuple, state); - } - swap(a, pm); - pa = pb = a + 1; - pc = pd = a + (n - 1); - for (;;) - { - while (pb <= pc && (r = cmp_tuple(pb, a, state)) <= 0) - { - if (r == 0) - { - swap(pa, pb); - pa++; - } - pb++; - CHECK_FOR_INTERRUPTS(); - } - while (pb <= pc && (r = cmp_tuple(pc, a, state)) >= 0) - { - if (r == 0) - { - swap(pc, pd); - pd--; - } - pc--; - CHECK_FOR_INTERRUPTS(); - } - if (pb > pc) - break; - swap(pb, pc); - pb++; - pc--; - } - pn = a + n; - d1 = Min(pa - a, pb - pa); - vecswap(a, pb - d1, d1); - d1 = Min(pd - pc, pn - pd - 1); - vecswap(pb, pn - d1, d1); - d1 = pb - pa; - d2 = pd - pc; - if (d1 <= d2) - { - /* Recurse on left partition, then iterate on right partition */ - if (d1 > 1) - qsort_tuple(a, d1, cmp_tuple, state); - if (d2 > 1) - { - /* Iterate rather than recurse to save stack space */ - /* qsort_tuple(pn - d2, d2, cmp_tuple, state); */ - a = pn - d2; - n = d2; - goto loop; - } - } - else - { - /* Recurse on right partition, then iterate on left partition */ - if (d2 > 1) - qsort_tuple(pn - d2, d2, cmp_tuple, state); - if (d1 > 1) - { - /* Iterate rather than recurse to save stack space */ - /* qsort_tuple(a, d1, cmp_tuple, state); */ - n = d1; - goto loop; - } - } -} +static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, + RumTuplesortstate * state); +static void copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup); /* * rum_tuplesort_begin_xxx @@ -695,7 +184,7 @@ rum_tuplesort_begin_common(int workMem, bool randomAccess) * see comments in grow_memtuples(). */ state->memtupsize = Max(1024, - ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); + ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); state->growmemtuples = true; state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); @@ -713,8 +202,11 @@ rum_tuplesort_begin_common(int workMem, bool randomAccess) * inittapes(), if needed */ +#if PG_VERSION_NUM >= 150000 + state->result_tape = NULL; +#else state->result_tape = -1; /* flag that result tape has not been formed */ - +#endif MemoryContextSwitchTo(oldcontext); return state; @@ -725,7 +217,7 @@ rum_tuplesort_begin_common(int workMem, bool randomAccess) * RumSortItem. */ MemoryContext -rum_tuplesort_get_memorycontext(RumTuplesortstate *state) +rum_tuplesort_get_memorycontext(RumTuplesortstate * state) { return state->sortcontext; } @@ -748,13 +240,10 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, state->nKeys = nKeys; - state->comparetup = comparetup_rum; + state->comparetup = compareItemPointer ? comparetup_rum_true : comparetup_rum_false; state->copytup = copytup_rum; state->writetup = writetup_rum; state->readtup = readtup_rum; - state->reversedirection = reversedirection_rum; - state->reverse = false; - state->compareItemPointer = compareItemPointer; MemoryContextSwitchTo(oldcontext); @@ -765,28 +254,30 @@ RumTuplesortstate * rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) { RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, false); + RumTuplesortstateExt *rs; MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->sortcontext); + /* Allocate extended state in the same context as state */ + rs = palloc(sizeof(RumTuplesortstateExt)); + #ifdef TRACE_SORT if (trace_sort) elog(LOG, "begin rumitem sort: workMem = %d", workMem); #endif - state->cmp = cmp; + rs->cmp = cmp; state->comparetup = comparetup_rumitem; state->copytup = copytup_rumitem; state->writetup = writetup_rumitem; state->readtup = readtup_rumitem; - state->reversedirection = reversedirection_rum; - state->reverse = false; - state->compareItemPointer = false; + memcpy(&rs->ts, state, sizeof(RumTuplesortstate)); MemoryContextSwitchTo(oldcontext); - return state; + return (RumTuplesortstate *) rs; } /* @@ -799,7 +290,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) * pointers afterwards! */ void -rum_tuplesort_end(RumTuplesortstate *state) +rum_tuplesort_end(RumTuplesortstate * state) { /* context swap probably not needed, but let's be safe */ MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); @@ -852,128 +343,8 @@ rum_tuplesort_end(RumTuplesortstate *state) MemoryContextDelete(state->sortcontext); } -/* - * Grow the memtuples[] array, if possible within our memory constraint. - * Return true if we were able to enlarge the array, false if not. - * - * Normally, at each increment we double the size of the array. When we no - * longer have enough memory to do that, we attempt one last, smaller increase - * (and then clear the growmemtuples flag so we don't try any more). That - * allows us to use allowedMem as fully as possible; sticking to the pure - * doubling rule could result in almost half of allowedMem going unused. - * Because availMem moves around with tuple addition/removal, we need some - * rule to prevent making repeated small increases in memtupsize, which would - * just be useless thrashing. The growmemtuples flag accomplishes that and - * also prevents useless recalculations in this function. - */ -static bool -grow_memtuples(RumTuplesortstate *state) -{ - int newmemtupsize; - int memtupsize = state->memtupsize; - long memNowUsed = state->allowedMem - state->availMem; - - /* Forget it if we've already maxed out memtuples, per comment above */ - if (!state->growmemtuples) - return false; - - /* Select new value of memtupsize */ - if (memNowUsed <= state->availMem) - { - /* - * It is surely safe to double memtupsize if we've used no more than - * half of allowedMem. - * - * Note: it might seem that we need to worry about memtupsize * 2 - * overflowing an int, but the MaxAllocSize clamp applied below - * ensures the existing memtupsize can't be large enough for that. - */ - newmemtupsize = memtupsize * 2; - } - else - { - /* - * This will be the last increment of memtupsize. Abandon doubling - * strategy and instead increase as much as we safely can. - * - * To stay within allowedMem, we can't increase memtupsize by more - * than availMem / sizeof(SortTuple) elements. In practice, we want - * to increase it by considerably less, because we need to leave some - * space for the tuples to which the new array slots will refer. We - * assume the new tuples will be about the same size as the tuples - * we've already seen, and thus we can extrapolate from the space - * consumption so far to estimate an appropriate new size for the - * memtuples array. The optimal value might be higher or lower than - * this estimate, but it's hard to know that in advance. - * - * This calculation is safe against enlarging the array so much that - * LACKMEM becomes true, because the memory currently used includes - * the present array; thus, there would be enough allowedMem for the - * new array elements even if no other memory were currently used. - * - * We do the arithmetic in float8, because otherwise the product of - * memtupsize and allowedMem could overflow. (A little algebra shows - * that grow_ratio must be less than 2 here, so we are not risking - * integer overflow this way.) Any inaccuracy in the result should be - * insignificant; but even if we computed a completely insane result, - * the checks below will prevent anything really bad from happening. - */ - double grow_ratio; - - grow_ratio = (double) state->allowedMem / (double) memNowUsed; - newmemtupsize = (int) (memtupsize * grow_ratio); - - /* We won't make any further enlargement attempts */ - state->growmemtuples = false; - } - - /* Must enlarge array by at least one element, else report failure */ - if (newmemtupsize <= memtupsize) - goto noalloc; - - /* - * On a 64-bit machine, allowedMem could be more than MaxAllocSize. Clamp - * to ensure our request won't be rejected by palloc. - */ - if ((Size) newmemtupsize >= MaxAllocSize / sizeof(SortTuple)) - { - newmemtupsize = (int) (MaxAllocSize / sizeof(SortTuple)); - state->growmemtuples = false; /* can't grow any more */ - } - - /* - * We need to be sure that we do not cause LACKMEM to become true, else - * the space management algorithm will go nuts. The code above should - * never generate a dangerous request, but to be safe, check explicitly - * that the array growth fits within availMem. (We could still cause - * LACKMEM if the memory chunk overhead associated with the memtuples - * array were to increase. That shouldn't happen because we chose the - * initial array size large enough to ensure that palloc will be treating - * both old and new arrays as separate chunks. But we'll check LACKMEM - * explicitly below just in case.) - */ - if (state->availMem < (long) ((newmemtupsize - memtupsize) * sizeof(SortTuple))) - goto noalloc; - - /* OK, do it */ - FREEMEM(state, GetMemoryChunkSpace(state->memtuples)); - state->memtupsize = newmemtupsize; - state->memtuples = (SortTuple *) - repalloc(state->memtuples, - state->memtupsize * sizeof(SortTuple)); - USEMEM(state, GetMemoryChunkSpace(state->memtuples)); - if (LACKMEM(state)) - elog(ERROR, "unexpected out-of-memory situation in tuplesort"); - return true; - -noalloc: - /* If for any reason we didn't realloc, shut off future attempts */ - state->growmemtuples = false; - return false; -} - void -rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem * item) +rum_tuplesort_putrum(RumTuplesortstate * state, RumSortItem * item) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -990,7 +361,7 @@ rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem * item) } void -rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem * item) +rum_tuplesort_putrumitem(RumTuplesortstate * state, RumScanItem * item) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1006,222 +377,10 @@ rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem * item) MemoryContextSwitchTo(oldcontext); } -/* - * Shared code for tuple and datum cases. - */ -static void -puttuple_common(RumTuplesortstate *state, SortTuple *tuple) -{ - switch (state->status) - { - case TSS_INITIAL: - - /* - * Save the tuple into the unsorted array. First, grow the array - * as needed. Note that we try to grow the array when there is - * still one free slot remaining --- if we fail, there'll still be - * room to store the incoming tuple, and then we'll switch to - * tape-based operation. - */ - if (state->memtupcount >= state->memtupsize - 1) - { - (void) grow_memtuples(state); - Assert(state->memtupcount < state->memtupsize); - } - state->memtuples[state->memtupcount++] = *tuple; - - /* - * Check if it's time to switch over to a bounded heapsort. We do - * so if the input tuple count exceeds twice the desired tuple - * count (this is a heuristic for where heapsort becomes cheaper - * than a quicksort), or if we've just filled workMem and have - * enough tuples to meet the bound. - * - * Note that once we enter TSS_BOUNDED state we will always try to - * complete the sort that way. In the worst case, if later input - * tuples are larger than earlier ones, this might cause us to - * exceed workMem significantly. - */ - if (state->bounded && - (state->memtupcount > state->bound * 2 || - (state->memtupcount > state->bound && LACKMEM(state)))) - { -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, "switching to bounded heapsort at %d tuples: %s", - state->memtupcount, - pg_rusage_show(&state->ru_start)); -#endif - make_bounded_heap(state); - return; - } - - /* - * Done if we still fit in available memory and have array slots. - */ - if (state->memtupcount < state->memtupsize && !LACKMEM(state)) - return; - - /* - * Nope; time to switch to tape-based operation. - */ - inittapes(state); - - /* - * Dump tuples until we are back under the limit. - */ - dumptuples(state, false); - break; - - case TSS_BOUNDED: - - /* - * We don't want to grow the array here, so check whether the new - * tuple can be discarded before putting it in. This should be a - * good speed optimization, too, since when there are many more - * input tuples than the bound, most input tuples can be discarded - * with just this one comparison. Note that because we currently - * have the sort direction reversed, we must check for <= not >=. - */ - if (COMPARETUP(state, tuple, &state->memtuples[0]) <= 0) - { - /* new tuple <= top of the heap, so we can discard it */ - free_sort_tuple(state, tuple); - CHECK_FOR_INTERRUPTS(); - } - else - { - /* discard top of heap, sift up, insert new tuple */ - free_sort_tuple(state, &state->memtuples[0]); - rum_tuplesort_heap_siftup(state, false); - rum_tuplesort_heap_insert(state, tuple, 0, false); - } - break; - - case TSS_BUILDRUNS: - - /* - * Insert the tuple into the heap, with run number currentRun if - * it can go into the current run, else run number currentRun+1. - * The tuple can go into the current run if it is >= the first - * not-yet-output tuple. (Actually, it could go into the current - * run if it is >= the most recently output tuple ... but that - * would require keeping around the tuple we last output, and it's - * simplest to let writetup free each tuple as soon as it's - * written.) - * - * Note there will always be at least one tuple in the heap at - * this point; see dumptuples. - */ - Assert(state->memtupcount > 0); - if (COMPARETUP(state, tuple, &state->memtuples[0]) >= 0) - rum_tuplesort_heap_insert(state, tuple, state->currentRun, true); - else - rum_tuplesort_heap_insert(state, tuple, state->currentRun + 1, true); - - /* - * If we are over the memory limit, dump tuples till we're under. - */ - dumptuples(state, false); - break; - - default: - elog(ERROR, "invalid tuplesort state"); - break; - } -} - -/* - * All tuples have been provided; finish the sort. - */ void -rum_tuplesort_performsort(RumTuplesortstate *state) +rum_tuplesort_performsort(RumTuplesortstate * state) { - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, "performsort starting: %s", - pg_rusage_show(&state->ru_start)); -#endif - - switch (state->status) - { - case TSS_INITIAL: - - /* - * We were able to accumulate all the tuples within the allowed - * amount of memory. Just qsort 'em and we're done. - */ - if (state->memtupcount > 1) - { - /* Can we use the single-key sort function? */ - if (state->onlyKey != NULL) - qsort_ssup(state->memtuples, state->memtupcount, - state->onlyKey); - else - qsort_tuple(state->memtuples, - state->memtupcount, - state->comparetup, - state); - } - state->current = 0; - state->eof_reached = false; - state->markpos_offset = 0; - state->markpos_eof = false; - state->status = TSS_SORTEDINMEM; - break; - - case TSS_BOUNDED: - - /* - * We were able to accumulate all the tuples required for output - * in memory, using a heap to eliminate excess tuples. Now we - * have to transform the heap to a properly-sorted array. - */ - sort_bounded_heap(state); - state->current = 0; - state->eof_reached = false; - state->markpos_offset = 0; - state->markpos_eof = false; - state->status = TSS_SORTEDINMEM; - break; - - case TSS_BUILDRUNS: - - /* - * Finish tape-based sort. First, flush all tuples remaining in - * memory out to tape; then merge until we have a single remaining - * run (or, if !randomAccess, one run per tape). Note that - * mergeruns sets the correct state->status. - */ - dumptuples(state, true); - mergeruns(state); - state->eof_reached = false; - state->markpos_block = 0L; - state->markpos_offset = 0; - state->markpos_eof = false; - break; - - default: - elog(ERROR, "invalid tuplesort state"); - break; - } - -#ifdef TRACE_SORT - if (trace_sort) - { - if (state->status == TSS_FINALMERGE) - elog(LOG, "performsort done (except %d-way final merge): %s", - state->activeTapes, - pg_rusage_show(&state->ru_start)); - else - elog(LOG, "performsort done: %s", - pg_rusage_show(&state->ru_start)); - } -#endif - - MemoryContextSwitchTo(oldcontext); + tuplesort_performsort(state); } /* @@ -1230,203 +389,32 @@ rum_tuplesort_performsort(RumTuplesortstate *state) * If *should_free is set, the caller must pfree stup.tuple when done with it. */ static bool -rum_tuplesort_gettuple_common(RumTuplesortstate *state, bool forward, +rum_tuplesort_gettuple_common(RumTuplesortstate * state, bool forward, SortTuple *stup, bool *should_free) { - unsigned int tuplen; + bool res = tuplesort_gettuple_common(state, forward, stup); switch (state->status) { case TSS_SORTEDINMEM: - Assert(forward || state->randomAccess); *should_free = false; - if (forward) - { - if (state->current < state->memtupcount) - { - *stup = state->memtuples[state->current++]; - return true; - } - state->eof_reached = true; - - /* - * Complain if caller tries to retrieve more tuples than - * originally asked for in a bounded sort. This is because - * returning EOF here might be the wrong thing. - */ - if (state->bounded && state->current >= state->bound) - elog(ERROR, "retrieved too many tuples in a bounded sort"); - - return false; - } - else - { - if (state->current <= 0) - return false; - - /* - * if all tuples are fetched already then we return last - * tuple, else - tuple before last returned. - */ - if (state->eof_reached) - state->eof_reached = false; - else - { - state->current--; /* last returned tuple */ - if (state->current <= 0) - return false; - } - *stup = state->memtuples[state->current - 1]; - return true; - } break; case TSS_SORTEDONTAPE: - Assert(forward || state->randomAccess); - *should_free = true; - if (forward) - { - if (state->eof_reached) - return false; - if ((tuplen = getlen(state, state->result_tape, true)) != 0) - { - READTUP(state, stup, state->result_tape, tuplen); - return true; - } - else - { - state->eof_reached = true; - return false; - } - } - - /* - * Backward. - * - * if all tuples are fetched already then we return last tuple, - * else - tuple before last returned. - */ - if (state->eof_reached) - { - /* - * Seek position is pointing just past the zero tuplen at the - * end of file; back up to fetch last tuple's ending length - * word. If seek fails we must have a completely empty file. - */ - if (!LogicalTapeBackspace(state->tapeset, - state->result_tape, - 2 * sizeof(unsigned int))) - return false; - state->eof_reached = false; - } - else - { - /* - * Back up and fetch previously-returned tuple's ending length - * word. If seek fails, assume we are at start of file. - */ - if (!LogicalTapeBackspace(state->tapeset, - state->result_tape, - sizeof(unsigned int))) - return false; - tuplen = getlen(state, state->result_tape, false); - - /* - * Back up to get ending length word of tuple before it. - */ - if (!LogicalTapeBackspace(state->tapeset, - state->result_tape, - tuplen + 2 * sizeof(unsigned int))) - { - /* - * If that fails, presumably the prev tuple is the first - * in the file. Back up so that it becomes next to read - * in forward direction (not obviously right, but that is - * what in-memory case does). - */ - if (!LogicalTapeBackspace(state->tapeset, - state->result_tape, - tuplen + sizeof(unsigned int))) - elog(ERROR, "bogus tuple length in backward scan"); - return false; - } - } - - tuplen = getlen(state, state->result_tape, false); - - /* - * Now we have the length of the prior tuple, back up and read it. - * Note: READTUP expects we are positioned after the initial - * length word of the tuple, so back up to that point. - */ - if (!LogicalTapeBackspace(state->tapeset, - state->result_tape, - tuplen)) - elog(ERROR, "bogus tuple length in backward scan"); - READTUP(state, stup, state->result_tape, tuplen); - return true; - case TSS_FINALMERGE: - Assert(forward); *should_free = true; - - /* - * This code should match the inner loop of mergeonerun(). - */ - if (state->memtupcount > 0) - { - int srcTape = state->memtuples[0].tupindex; - Size tuplen; - int tupIndex; - SortTuple *newtup; - - *stup = state->memtuples[0]; - /* returned tuple is no longer counted in our memory space */ - if (stup->tuple) - { - tuplen = GetMemoryChunkSpace(stup->tuple); - state->availMem += tuplen; - state->mergeavailmem[srcTape] += tuplen; - } - rum_tuplesort_heap_siftup(state, false); - if ((tupIndex = state->mergenext[srcTape]) == 0) - { - /* - * out of preloaded data on this tape, try to read more - * - * Unlike mergeonerun(), we only preload from the single - * tape that's run dry. See mergepreread() comments. - */ - mergeprereadone(state, srcTape); - - /* - * if still no data, we've reached end of run on this tape - */ - if ((tupIndex = state->mergenext[srcTape]) == 0) - return true; - } - /* pull next preread tuple from list, insert in heap */ - newtup = &state->memtuples[tupIndex]; - state->mergenext[srcTape] = newtup->tupindex; - if (state->mergenext[srcTape] == 0) - state->mergelast[srcTape] = 0; - rum_tuplesort_heap_insert(state, newtup, srcTape, false); - /* put the now-unused memtuples entry on the freelist */ - newtup->tupindex = state->mergefreelist; - state->mergefreelist = tupIndex; - state->mergeavailslots[srcTape]++; - return true; - } - return false; + break; default: elog(ERROR, "invalid tuplesort state"); return false; /* keep compiler quiet */ } + + return res; } RumSortItem * -rum_tuplesort_getrum(RumTuplesortstate *state, bool forward, bool *should_free) +rum_tuplesort_getrum(RumTuplesortstate * state, bool forward, bool *should_free) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1440,7 +428,7 @@ rum_tuplesort_getrum(RumTuplesortstate *state, bool forward, bool *should_free) } RumScanItem * -rum_tuplesort_getrumitem(RumTuplesortstate *state, bool forward, bool *should_free) +rum_tuplesort_getrumitem(RumTuplesortstate * state, bool forward, bool *should_free) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -1483,850 +471,20 @@ rum_tuplesort_merge_order(long allowedMem) return mOrder; } -/* - * inittapes - initialize for tape sorting. - * - * This is called only if we have found we don't have room to sort in memory. - */ -static void -inittapes(RumTuplesortstate *state) +static int +comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) { - int maxTapes, - ntuples, - j; - long tapeSpace; - - /* Compute number of tapes to use: merge order plus 1 */ - maxTapes = rum_tuplesort_merge_order(state->allowedMem) + 1; - - /* - * We must have at least 2*maxTapes slots in the memtuples[] array, else - * we'd not have room for merge heap plus preread. It seems unlikely that - * this case would ever occur, but be safe. - */ - maxTapes = Min(maxTapes, state->memtupsize / 2); + return comparetup_rum(a, b, state, true); +} - state->maxTapes = maxTapes; - state->tapeRange = maxTapes - 1; - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, "switching to external sort with %d tapes: %s", - maxTapes, pg_rusage_show(&state->ru_start)); -#endif - - /* - * Decrease availMem to reflect the space needed for tape buffers; but - * don't decrease it to the point that we have no room for tuples. (That - * case is only likely to occur if sorting pass-by-value Datums; in all - * other scenarios the memtuples[] array is unlikely to occupy more than - * half of allowedMem. In the pass-by-value case it's not important to - * account for tuple space, so we don't care if LACKMEM becomes - * inaccurate.) - */ - tapeSpace = (long) maxTapes *TAPE_BUFFER_OVERHEAD; - - if (tapeSpace + GetMemoryChunkSpace(state->memtuples) < state->allowedMem) - USEMEM(state, tapeSpace); - - /* - * Make sure that the temp file(s) underlying the tape set are created in - * suitable temp tablespaces. - */ - PrepareTempTablespaces(); - - /* - * Create the tape set and allocate the per-tape data arrays. - */ - state->tapeset = LogicalTapeSetCreate(maxTapes); - - state->mergeactive = (bool *) palloc0(maxTapes * sizeof(bool)); - state->mergenext = (int *) palloc0(maxTapes * sizeof(int)); - state->mergelast = (int *) palloc0(maxTapes * sizeof(int)); - state->mergeavailslots = (int *) palloc0(maxTapes * sizeof(int)); - state->mergeavailmem = (long *) palloc0(maxTapes * sizeof(long)); - state->tp_fib = (int *) palloc0(maxTapes * sizeof(int)); - state->tp_runs = (int *) palloc0(maxTapes * sizeof(int)); - state->tp_dummy = (int *) palloc0(maxTapes * sizeof(int)); - state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int)); - - /* - * Convert the unsorted contents of memtuples[] into a heap. Each tuple is - * marked as belonging to run number zero. - * - * NOTE: we pass false for checkIndex since there's no point in comparing - * indexes in this step, even though we do intend the indexes to be part - * of the sort key... - */ - ntuples = state->memtupcount; - state->memtupcount = 0; /* make the heap empty */ - for (j = 0; j < ntuples; j++) - { - /* Must copy source tuple to avoid possible overwrite */ - SortTuple stup = state->memtuples[j]; - - rum_tuplesort_heap_insert(state, &stup, 0, false); - } - Assert(state->memtupcount == ntuples); - - state->currentRun = 0; - - /* - * Initialize variables of Algorithm D (step D1). - */ - for (j = 0; j < maxTapes; j++) - { - state->tp_fib[j] = 1; - state->tp_runs[j] = 0; - state->tp_dummy[j] = 1; - state->tp_tapenum[j] = j; - } - state->tp_fib[state->tapeRange] = 0; - state->tp_dummy[state->tapeRange] = 0; - - state->Level = 1; - state->destTape = 0; - - state->status = TSS_BUILDRUNS; -} - -/* - * selectnewtape -- select new tape for new initial run. - * - * This is called after finishing a run when we know another run - * must be started. This implements steps D3, D4 of Algorithm D. - */ -static void -selectnewtape(RumTuplesortstate *state) -{ - int j; - int a; - - /* Step D3: advance j (destTape) */ - if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1]) - { - state->destTape++; - return; - } - if (state->tp_dummy[state->destTape] != 0) - { - state->destTape = 0; - return; - } - - /* Step D4: increase level */ - state->Level++; - a = state->tp_fib[0]; - for (j = 0; j < state->tapeRange; j++) - { - state->tp_dummy[j] = a + state->tp_fib[j + 1] - state->tp_fib[j]; - state->tp_fib[j] = a + state->tp_fib[j + 1]; - } - state->destTape = 0; -} - -/* - * mergeruns -- merge all the completed initial runs. - * - * This implements steps D5, D6 of Algorithm D. All input data has - * already been written to initial runs on tape (see dumptuples). - */ -static void -mergeruns(RumTuplesortstate *state) -{ - int tapenum, - svTape, - svRuns, - svDummy; - int numTapes; - int numInputTapes; - - Assert(state->status == TSS_BUILDRUNS); - Assert(state->memtupcount == 0); - - /* - * If we produced only one initial run (quite likely if the total data - * volume is between 1X and 2X workMem), we can just use that tape as the - * finished output, rather than doing a useless merge. (This obvious - * optimization is not in Knuth's algorithm.) - */ - if (state->currentRun == 1) - { - state->result_tape = state->tp_tapenum[state->destTape]; - /* must freeze and rewind the finished output tape */ - LogicalTapeFreeze(state->tapeset, state->result_tape); - state->status = TSS_SORTEDONTAPE; - return; - } - - /* - * If we had fewer runs than tapes, refund the memory that we imagined we - * would need for the tape buffers of the unused tapes. - * - * numTapes and numInputTapes reflect the actual number of tapes we will - * use. Note that the output tape's tape number is maxTapes - 1, so the - * tape numbers of the used tapes are not consecutive, and you cannot just - * loop from 0 to numTapes to visit all used tapes! - */ - if (state->Level == 1) - { - numInputTapes = state->currentRun; - numTapes = numInputTapes + 1; - FREEMEM(state, (state->maxTapes - numTapes) * TAPE_BUFFER_OVERHEAD); - } - else - { - numInputTapes = state->tapeRange; - } - - state->read_buffer_size = Max(state->availMem / numInputTapes, 0); - USEMEM(state, state->read_buffer_size * numInputTapes); - - /* End of step D2: rewind all output tapes to prepare for merging */ - for (tapenum = 0; tapenum < state->tapeRange; tapenum++) - LogicalTapeRewindForRead(state->tapeset, tapenum, state->read_buffer_size); - - for (;;) - { - /* - * At this point we know that tape[T] is empty. If there's just one - * (real or dummy) run left on each input tape, then only one merge - * pass remains. If we don't have to produce a materialized sorted - * tape, we can stop at this point and do the final merge on-the-fly. - */ - if (!state->randomAccess) - { - bool allOneRun = true; - - Assert(state->tp_runs[state->tapeRange] == 0); - for (tapenum = 0; tapenum < state->tapeRange; tapenum++) - { - if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) - { - allOneRun = false; - break; - } - } - if (allOneRun) - { - /* Tell logtape.c we won't be writing anymore */ - LogicalTapeSetForgetFreeSpace(state->tapeset); - /* Initialize for the final merge pass */ - beginmerge(state); - state->status = TSS_FINALMERGE; - return; - } - } - - /* Step D5: merge runs onto tape[T] until tape[P] is empty */ - while (state->tp_runs[state->tapeRange - 1] || - state->tp_dummy[state->tapeRange - 1]) - { - bool allDummy = true; - - for (tapenum = 0; tapenum < state->tapeRange; tapenum++) - { - if (state->tp_dummy[tapenum] == 0) - { - allDummy = false; - break; - } - } - - if (allDummy) - { - state->tp_dummy[state->tapeRange]++; - for (tapenum = 0; tapenum < state->tapeRange; tapenum++) - state->tp_dummy[tapenum]--; - } - else - mergeonerun(state); - } - - /* Step D6: decrease level */ - if (--state->Level == 0) - break; - /* rewind output tape T to use as new input */ - LogicalTapeRewindForRead(state->tapeset, state->tp_tapenum[state->tapeRange], - state->read_buffer_size); - /* rewind used-up input tape P, and prepare it for write pass */ - LogicalTapeRewindForWrite(state->tapeset, state->tp_tapenum[state->tapeRange - 1]); - state->tp_runs[state->tapeRange - 1] = 0; - - /* - * reassign tape units per step D6; note we no longer care about A[] - */ - svTape = state->tp_tapenum[state->tapeRange]; - svDummy = state->tp_dummy[state->tapeRange]; - svRuns = state->tp_runs[state->tapeRange]; - for (tapenum = state->tapeRange; tapenum > 0; tapenum--) - { - state->tp_tapenum[tapenum] = state->tp_tapenum[tapenum - 1]; - state->tp_dummy[tapenum] = state->tp_dummy[tapenum - 1]; - state->tp_runs[tapenum] = state->tp_runs[tapenum - 1]; - } - state->tp_tapenum[0] = svTape; - state->tp_dummy[0] = svDummy; - state->tp_runs[0] = svRuns; - } - - /* - * Done. Knuth says that the result is on TAPE[1], but since we exited - * the loop without performing the last iteration of step D6, we have not - * rearranged the tape unit assignment, and therefore the result is on - * TAPE[T]. We need to do it this way so that we can freeze the final - * output tape while rewinding it. The last iteration of step D6 would be - * a waste of cycles anyway... - */ - state->result_tape = state->tp_tapenum[state->tapeRange]; - LogicalTapeFreeze(state->tapeset, state->result_tape); - state->status = TSS_SORTEDONTAPE; -} - -/* - * Merge one run from each input tape, except ones with dummy runs. - * - * This is the inner loop of Algorithm D step D5. We know that the - * output tape is TAPE[T]. - */ -static void -mergeonerun(RumTuplesortstate *state) -{ - int destTape = state->tp_tapenum[state->tapeRange]; - int srcTape; - int tupIndex; - SortTuple *tup; - long priorAvail, - spaceFreed; - - /* - * Start the merge by loading one tuple from each active source tape into - * the heap. We can also decrease the input run/dummy run counts. - */ - beginmerge(state); - - /* - * Execute merge by repeatedly extracting lowest tuple in heap, writing it - * out, and replacing it with next tuple from same tape (if there is - * another one). - */ - while (state->memtupcount > 0) - { - /* write the tuple to destTape */ - priorAvail = state->availMem; - srcTape = state->memtuples[0].tupindex; - WRITETUP(state, destTape, &state->memtuples[0]); - /* writetup adjusted total free space, now fix per-tape space */ - spaceFreed = state->availMem - priorAvail; - state->mergeavailmem[srcTape] += spaceFreed; - /* compact the heap */ - rum_tuplesort_heap_siftup(state, false); - if ((tupIndex = state->mergenext[srcTape]) == 0) - { - /* out of preloaded data on this tape, try to read more */ - mergepreread(state); - /* if still no data, we've reached end of run on this tape */ - if ((tupIndex = state->mergenext[srcTape]) == 0) - continue; - } - /* pull next preread tuple from list, insert in heap */ - tup = &state->memtuples[tupIndex]; - state->mergenext[srcTape] = tup->tupindex; - if (state->mergenext[srcTape] == 0) - state->mergelast[srcTape] = 0; - rum_tuplesort_heap_insert(state, tup, srcTape, false); - /* put the now-unused memtuples entry on the freelist */ - tup->tupindex = state->mergefreelist; - state->mergefreelist = tupIndex; - state->mergeavailslots[srcTape]++; - } - - /* - * When the heap empties, we're done. Write an end-of-run marker on the - * output tape, and increment its count of real runs. - */ - markrunend(state, destTape); - state->tp_runs[state->tapeRange]++; - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, "finished %d-way merge step: %s", state->activeTapes, - pg_rusage_show(&state->ru_start)); -#endif -} - -/* - * beginmerge - initialize for a merge pass - * - * We decrease the counts of real and dummy runs for each tape, and mark - * which tapes contain active input runs in mergeactive[]. Then, load - * as many tuples as we can from each active input tape, and finally - * fill the merge heap with the first tuple from each active tape. - */ -static void -beginmerge(RumTuplesortstate *state) -{ - int activeTapes; - int tapenum; - int srcTape; - int slotsPerTape; - long spacePerTape; - - /* Heap should be empty here */ - Assert(state->memtupcount == 0); - - /* Adjust run counts and mark the active tapes */ - memset(state->mergeactive, 0, - state->maxTapes * sizeof(*state->mergeactive)); - activeTapes = 0; - for (tapenum = 0; tapenum < state->tapeRange; tapenum++) - { - if (state->tp_dummy[tapenum] > 0) - state->tp_dummy[tapenum]--; - else - { - Assert(state->tp_runs[tapenum] > 0); - state->tp_runs[tapenum]--; - srcTape = state->tp_tapenum[tapenum]; - state->mergeactive[srcTape] = true; - activeTapes++; - } - } - state->activeTapes = activeTapes; - - /* Clear merge-pass state variables */ - memset(state->mergenext, 0, - state->maxTapes * sizeof(*state->mergenext)); - memset(state->mergelast, 0, - state->maxTapes * sizeof(*state->mergelast)); - state->mergefreelist = 0; /* nothing in the freelist */ - state->mergefirstfree = activeTapes; /* 1st slot avail for preread */ - - /* - * Initialize space allocation to let each active input tape have an equal - * share of preread space. - */ - Assert(activeTapes > 0); - slotsPerTape = (state->memtupsize - state->mergefirstfree) / activeTapes; - Assert(slotsPerTape > 0); - spacePerTape = state->availMem / activeTapes; - for (srcTape = 0; srcTape < state->maxTapes; srcTape++) - { - if (state->mergeactive[srcTape]) - { - state->mergeavailslots[srcTape] = slotsPerTape; - state->mergeavailmem[srcTape] = spacePerTape; - } - } - - /* - * Preread as many tuples as possible (and at least one) from each active - * tape - */ - mergepreread(state); - - /* Load the merge heap with the first tuple from each input tape */ - for (srcTape = 0; srcTape < state->maxTapes; srcTape++) - { - int tupIndex = state->mergenext[srcTape]; - SortTuple *tup; - - if (tupIndex) - { - tup = &state->memtuples[tupIndex]; - state->mergenext[srcTape] = tup->tupindex; - if (state->mergenext[srcTape] == 0) - state->mergelast[srcTape] = 0; - rum_tuplesort_heap_insert(state, tup, srcTape, false); - /* put the now-unused memtuples entry on the freelist */ - tup->tupindex = state->mergefreelist; - state->mergefreelist = tupIndex; - state->mergeavailslots[srcTape]++; - } - } -} - -/* - * mergepreread - load tuples from merge input tapes - * - * This routine exists to improve sequentiality of reads during a merge pass, - * as explained in the header comments of this file. Load tuples from each - * active source tape until the tape's run is exhausted or it has used up - * its fair share of available memory. In any case, we guarantee that there - * is at least one preread tuple available from each unexhausted input tape. - * - * We invoke this routine at the start of a merge pass for initial load, - * and then whenever any tape's preread data runs out. Note that we load - * as much data as possible from all tapes, not just the one that ran out. - * This is because logtape.c works best with a usage pattern that alternates - * between reading a lot of data and writing a lot of data, so whenever we - * are forced to read, we should fill working memory completely. - * - * In FINALMERGE state, we *don't* use this routine, but instead just preread - * from the single tape that ran dry. There's no read/write alternation in - * that state and so no point in scanning through all the tapes to fix one. - * (Moreover, there may be quite a lot of inactive tapes in that state, since - * we might have had many fewer runs than tapes. In a regular tape-to-tape - * merge we can expect most of the tapes to be active.) - */ -static void -mergepreread(RumTuplesortstate *state) -{ - int srcTape; - - for (srcTape = 0; srcTape < state->maxTapes; srcTape++) - mergeprereadone(state, srcTape); -} - -/* - * mergeprereadone - load tuples from one merge input tape - * - * Read tuples from the specified tape until it has used up its free memory - * or array slots; but ensure that we have at least one tuple, if any are - * to be had. - */ -static void -mergeprereadone(RumTuplesortstate *state, int srcTape) -{ - unsigned int tuplen; - SortTuple stup; - int tupIndex; - long priorAvail, - spaceUsed; - - if (!state->mergeactive[srcTape]) - return; /* tape's run is already exhausted */ - priorAvail = state->availMem; - state->availMem = state->mergeavailmem[srcTape]; - while ((state->mergeavailslots[srcTape] > 0 && !LACKMEM(state)) || - state->mergenext[srcTape] == 0) - { - /* read next tuple, if any */ - if ((tuplen = getlen(state, srcTape, true)) == 0) - { - state->mergeactive[srcTape] = false; - break; - } - READTUP(state, &stup, srcTape, tuplen); - /* find a free slot in memtuples[] for it */ - tupIndex = state->mergefreelist; - if (tupIndex) - state->mergefreelist = state->memtuples[tupIndex].tupindex; - else - { - tupIndex = state->mergefirstfree++; - Assert(tupIndex < state->memtupsize); - } - state->mergeavailslots[srcTape]--; - /* store tuple, append to list for its tape */ - stup.tupindex = 0; - state->memtuples[tupIndex] = stup; - if (state->mergelast[srcTape]) - state->memtuples[state->mergelast[srcTape]].tupindex = tupIndex; - else - state->mergenext[srcTape] = tupIndex; - state->mergelast[srcTape] = tupIndex; - } - /* update per-tape and global availmem counts */ - spaceUsed = state->mergeavailmem[srcTape] - state->availMem; - state->mergeavailmem[srcTape] = state->availMem; - state->availMem = priorAvail - spaceUsed; -} - -/* - * dumptuples - remove tuples from heap and write to tape - * - * This is used during initial-run building, but not during merging. - * - * When alltuples = false, dump only enough tuples to get under the - * availMem limit (and leave at least one tuple in the heap in any case, - * since puttuple assumes it always has a tuple to compare to). We also - * insist there be at least one free slot in the memtuples[] array. - * - * When alltuples = true, dump everything currently in memory. - * (This case is only used at end of input data.) - * - * If we empty the heap, close out the current run and return (this should - * only happen at end of input data). If we see that the tuple run number - * at the top of the heap has changed, start a new run. - */ -static void -dumptuples(RumTuplesortstate *state, bool alltuples) -{ - while (alltuples || - (LACKMEM(state) && state->memtupcount > 1) || - state->memtupcount >= state->memtupsize) - { - /* - * Dump the heap's frontmost entry, and sift up to remove it from the - * heap. - */ - Assert(state->memtupcount > 0); - WRITETUP(state, state->tp_tapenum[state->destTape], - &state->memtuples[0]); - rum_tuplesort_heap_siftup(state, true); - - /* - * If the heap is empty *or* top run number has changed, we've - * finished the current run. - */ - if (state->memtupcount == 0 || - state->currentRun != state->memtuples[0].tupindex) - { - markrunend(state, state->tp_tapenum[state->destTape]); - state->currentRun++; - state->tp_runs[state->destTape]++; - state->tp_dummy[state->destTape]--; /* per Alg D step D2 */ - -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, "finished writing%s run %d to tape %d: %s", - (state->memtupcount == 0) ? " final" : "", - state->currentRun, state->destTape, - pg_rusage_show(&state->ru_start)); -#endif - - /* - * Done if heap is empty, else prepare for new run. - */ - if (state->memtupcount == 0) - break; - Assert(state->currentRun == state->memtuples[0].tupindex); - selectnewtape(state); - } - } -} - - -/* - * Heap manipulation routines, per Knuth's Algorithm 5.2.3H. - * - * Compare two SortTuples. If checkIndex is true, use the tuple index - * as the front of the sort key; otherwise, no. - */ - -#define HEAPCOMPARE(tup1,tup2) \ - (checkIndex && ((tup1)->tupindex != (tup2)->tupindex) ? \ - ((tup1)->tupindex) - ((tup2)->tupindex) : \ - COMPARETUP(state, tup1, tup2)) - -/* - * Convert the existing unordered array of SortTuples to a bounded heap, - * discarding all but the smallest "state->bound" tuples. - * - * When working with a bounded heap, we want to keep the largest entry - * at the root (array entry zero), instead of the smallest as in the normal - * sort case. This allows us to discard the largest entry cheaply. - * Therefore, we temporarily reverse the sort direction. - * - * We assume that all entries in a bounded heap will always have tupindex - * zero; it therefore doesn't matter that HEAPCOMPARE() doesn't reverse - * the direction of comparison for tupindexes. - */ -static void -make_bounded_heap(RumTuplesortstate *state) -{ - int tupcount = state->memtupcount; - int i; - - Assert(state->status == TSS_INITIAL); - Assert(state->bounded); - Assert(tupcount >= state->bound); - - /* Reverse sort direction so largest entry will be at root */ - REVERSEDIRECTION(state); - - state->memtupcount = 0; /* make the heap empty */ - for (i = 0; i < tupcount; i++) - { - if (state->memtupcount >= state->bound && - COMPARETUP(state, &state->memtuples[i], &state->memtuples[0]) <= 0) - { - /* New tuple would just get thrown out, so skip it */ - free_sort_tuple(state, &state->memtuples[i]); - CHECK_FOR_INTERRUPTS(); - } - else - { - /* Insert next tuple into heap */ - /* Must copy source tuple to avoid possible overwrite */ - SortTuple stup = state->memtuples[i]; - - rum_tuplesort_heap_insert(state, &stup, 0, false); - - /* If heap too full, discard largest entry */ - if (state->memtupcount > state->bound) - { - free_sort_tuple(state, &state->memtuples[0]); - rum_tuplesort_heap_siftup(state, false); - } - } - } - - Assert(state->memtupcount == state->bound); - state->status = TSS_BOUNDED; -} - -/* - * Convert the bounded heap to a properly-sorted array - */ -static void -sort_bounded_heap(RumTuplesortstate *state) -{ - int tupcount = state->memtupcount; - - Assert(state->status == TSS_BOUNDED); - Assert(state->bounded); - Assert(tupcount == state->bound); - - /* - * We can unheapify in place because each sift-up will remove the largest - * entry, which we can promptly store in the newly freed slot at the end. - * Once we're down to a single-entry heap, we're done. - */ - while (state->memtupcount > 1) - { - SortTuple stup = state->memtuples[0]; - - /* this sifts-up the next-largest entry and decreases memtupcount */ - rum_tuplesort_heap_siftup(state, false); - state->memtuples[state->memtupcount] = stup; - } - state->memtupcount = tupcount; - - /* - * Reverse sort direction back to the original state. This is not - * actually necessary but seems like a good idea for tidiness. - */ - REVERSEDIRECTION(state); - - state->status = TSS_SORTEDINMEM; - state->boundUsed = true; -} - -/* - * Insert a new tuple into an empty or existing heap, maintaining the - * heap invariant. Caller is responsible for ensuring there's room. - * - * Note: we assume *tuple is a temporary variable that can be scribbled on. - * For some callers, tuple actually points to a memtuples[] entry above the - * end of the heap. This is safe as long as it's not immediately adjacent - * to the end of the heap (ie, in the [memtupcount] array entry) --- if it - * is, it might get overwritten before being moved into the heap! - */ -static void -rum_tuplesort_heap_insert(RumTuplesortstate *state, SortTuple *tuple, - int tupleindex, bool checkIndex) -{ - SortTuple *memtuples; - int j; - - /* - * Save the tupleindex --- see notes above about writing on *tuple. It's a - * historical artifact that tupleindex is passed as a separate argument - * and not in *tuple, but it's notationally convenient so let's leave it - * that way. - */ - tuple->tupindex = tupleindex; - - memtuples = state->memtuples; - Assert(state->memtupcount < state->memtupsize); - - CHECK_FOR_INTERRUPTS(); - - /* - * Sift-up the new entry, per Knuth 5.2.3 exercise 16. Note that Knuth is - * using 1-based array indexes, not 0-based. - */ - j = state->memtupcount++; - while (j > 0) - { - int i = (j - 1) >> 1; - - if (HEAPCOMPARE(tuple, &memtuples[i]) >= 0) - break; - memtuples[j] = memtuples[i]; - j = i; - } - memtuples[j] = *tuple; -} - -/* - * The tuple at state->memtuples[0] has been removed from the heap. - * Decrement memtupcount, and sift up to maintain the heap invariant. - */ -static void -rum_tuplesort_heap_siftup(RumTuplesortstate *state, bool checkIndex) -{ - SortTuple *memtuples = state->memtuples; - SortTuple *tuple; - int i, - n; - - if (--state->memtupcount <= 0) - return; - - CHECK_FOR_INTERRUPTS(); - - n = state->memtupcount; - tuple = &memtuples[n]; /* tuple that must be reinserted */ - i = 0; /* i is where the "hole" is */ - for (;;) - { - int j = 2 * i + 1; - - if (j >= n) - break; - if (j + 1 < n && - HEAPCOMPARE(&memtuples[j], &memtuples[j + 1]) > 0) - j++; - if (HEAPCOMPARE(tuple, &memtuples[j]) <= 0) - break; - memtuples[i] = memtuples[j]; - i = j; - } - memtuples[i] = *tuple; -} - - -/* - * Tape interface routines - */ - -static unsigned int -getlen(RumTuplesortstate *state, int tapenum, bool eofOK) -{ - unsigned int len; - - if (LogicalTapeRead(state->tapeset, tapenum, - &len, sizeof(len)) != sizeof(len)) - elog(ERROR, "unexpected end of tape"); - if (len == 0 && !eofOK) - elog(ERROR, "unexpected end of data"); - return len; -} - -static void -markrunend(RumTuplesortstate *state, int tapenum) -{ - unsigned int len = 0; - - LogicalTapeWrite(state->tapeset, tapenum, (void *) &len, sizeof(len)); -} - - -/* - * Convenience routine to free a tuple previously loaded into sort memory - */ -static void -free_sort_tuple(RumTuplesortstate *state, SortTuple *stup) -{ - FREEMEM(state, GetMemoryChunkSpace(stup->tuple)); - pfree(stup->tuple); -} +static int +comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +{ + return comparetup_rum(a, b, state, false); +} static int -comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) +comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state, bool compareItemPointer) { RumSortItem *i1, *i2; @@ -2349,7 +507,7 @@ comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) return 1; } - if (!state->compareItemPointer) + if (!compareItemPointer) return 0; /* @@ -2374,7 +532,7 @@ comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) } static void -copytup_rum(RumTuplesortstate *state, SortTuple *stup, void *tup) +copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup) { RumSortItem *item = (RumSortItem *) tup; @@ -2384,19 +542,24 @@ copytup_rum(RumTuplesortstate *state, SortTuple *stup, void *tup) USEMEM(state, GetMemoryChunkSpace(tup)); } +#if PG_VERSION_NUM >= 150000 static void -writetup_rum(RumTuplesortstate *state, int tapenum, SortTuple *stup) +writetup_rum(RumTuplesortstate * state, LogicalTape *unused, + SortTuple *stup) +#else +static void +writetup_rum(RumTuplesortstate * state, int tapenum, SortTuple *stup) +#endif { RumSortItem *item = (RumSortItem *) stup->tuple; unsigned int writtenlen = RumSortItemSize(state->nKeys) + sizeof(unsigned int); - - LogicalTapeWrite(state->tapeset, tapenum, + LogicalTapeWrite(TAPE(state, tapenum), (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(state->tapeset, tapenum, + LogicalTapeWrite(TAPE(state, tapenum), (void *) item, RumSortItemSize(state->nKeys)); if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(state->tapeset, tapenum, + LogicalTapeWrite(TAPE(state, tapenum), (void *) &writtenlen, sizeof(writtenlen)); FREEMEM(state, GetMemoryChunkSpace(item)); @@ -2404,8 +567,13 @@ writetup_rum(RumTuplesortstate *state, int tapenum, SortTuple *stup) } static void -readtup_rum(RumTuplesortstate *state, SortTuple *stup, +#if PG_VERSION_NUM >= 150000 +readtup_rum(RumTuplesortstate * state, SortTuple *stup, + LogicalTape *unused, unsigned int len) +#else +readtup_rum(RumTuplesortstate * state, SortTuple *stup, int tapenum, unsigned int len) +#endif { unsigned int tuplen = len - sizeof(unsigned int); RumSortItem *item = (RumSortItem *) palloc(RumSortItemSize(state->nKeys)); @@ -2413,33 +581,28 @@ readtup_rum(RumTuplesortstate *state, SortTuple *stup, Assert(tuplen == RumSortItemSize(state->nKeys)); USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact(state->tapeset, tapenum, - (void *) item, RumSortItemSize(state->nKeys)); + LogicalTapeReadExact_compat(state, tapenum, + (void *) item, RumSortItemSize(state->nKeys)); stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); stup->isnull1 = false; stup->tuple = item; if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); -} - -static void -reversedirection_rum(RumTuplesortstate *state) -{ - state->reverse = !state->reverse; + LogicalTapeReadExact_compat(state, tapenum, + &tuplen, sizeof(tuplen)); } static int -comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) +comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) { - RumItem *i1, *i2; + RumItem *i1, + *i2; /* Extract RumItem from RumScanItem */ i1 = (RumItem *) a->tuple; i2 = (RumItem *) b->tuple; - if (state->cmp) + if (((RumTuplesortstateExt *) state)->cmp) { if (i1->addInfoIsNull || i2->addInfoIsNull) { @@ -2449,9 +612,9 @@ comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate *st } else { - int r; + int r; - r = DatumGetInt32(FunctionCall2(state->cmp, + r = DatumGetInt32(FunctionCall2(((RumTuplesortstateExt *) state)->cmp, i1->addInfo, i2->addInfo)); @@ -2482,7 +645,7 @@ comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate *st } static void -copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, void *tup) +copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) { stup->isnull1 = true; stup->tuple = palloc(sizeof(RumScanItem)); @@ -2490,18 +653,24 @@ copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, void *tup) USEMEM(state, GetMemoryChunkSpace(stup->tuple)); } +#if PG_VERSION_NUM >= 150000 static void -writetup_rumitem(RumTuplesortstate *state, int tapenum, SortTuple *stup) +writetup_rumitem(RumTuplesortstate * state, LogicalTape *unused, + SortTuple *stup) +#else +static void +writetup_rumitem(RumTuplesortstate * state, int tapenum, SortTuple *stup) +#endif { RumScanItem *item = (RumScanItem *) stup->tuple; unsigned int writtenlen = sizeof(*item) + sizeof(unsigned int); - LogicalTapeWrite(state->tapeset, tapenum, + LogicalTapeWrite(TAPE(state, tapenum), (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(state->tapeset, tapenum, + LogicalTapeWrite(TAPE(state, tapenum), (void *) item, sizeof(*item)); if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(state->tapeset, tapenum, + LogicalTapeWrite(TAPE(state, tapenum), (void *) &writtenlen, sizeof(writtenlen)); FREEMEM(state, GetMemoryChunkSpace(item)); @@ -2509,8 +678,13 @@ writetup_rumitem(RumTuplesortstate *state, int tapenum, SortTuple *stup) } static void -readtup_rumitem(RumTuplesortstate *state, SortTuple *stup, - int tapenum, unsigned int len) +#if PG_VERSION_NUM >= 150000 +readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, + LogicalTape *unused, unsigned int len) +#else +readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, + int tapenum, unsigned int len) +#endif { unsigned int tuplen = len - sizeof(unsigned int); RumScanItem *item = (RumScanItem *) palloc(sizeof(RumScanItem)); @@ -2518,13 +692,12 @@ readtup_rumitem(RumTuplesortstate *state, SortTuple *stup, Assert(tuplen == sizeof(RumScanItem)); USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact(state->tapeset, tapenum, - (void *) item, tuplen); + LogicalTapeReadExact_compat(state, tapenum, + (void *) item, tuplen); stup->isnull1 = true; stup->tuple = item; if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact(state->tapeset, tapenum, - &tuplen, sizeof(tuplen)); + LogicalTapeReadExact_compat(state, tapenum, + &tuplen, sizeof(tuplen)); } - diff --git a/src/rumsort.h b/src/rumsort.h index 452fdfc596..30faeb9d17 100644 --- a/src/rumsort.h +++ b/src/rumsort.h @@ -25,7 +25,7 @@ /* RumTuplesortstate is an opaque type whose details are not known outside * rumsort.c. */ -typedef struct RumTuplesortstate RumTuplesortstate; +typedef struct Tuplesortstate RumTuplesortstate; struct RumScanItem; typedef struct From 8ff329d661f7727faf53002fa3ce0479e0516666 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 10 Nov 2021 22:27:06 +0400 Subject: [PATCH 116/182] Rumsort refactoring to simplify - remove unused defines - use tuplesort_begin_common instead of rum_tuplesort_begin_common - simplify version-conditional defines - rearrange functions order in rumsort.c --- src/rumsort.c | 552 +++++++++++++++++++------------------------------- 1 file changed, 206 insertions(+), 346 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index a78e2b931b..113b7dc434 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -43,30 +43,6 @@ #include "tuplesort96.c" #endif -#if PG_VERSION_NUM < 100000 -/* Provide fallback for old version of tape interface for 9.6 */ -#define LogicalTapeRewindForRead(x, y, z) LogicalTapeRewind((x), (y), false) -#define LogicalTapeRewindForWrite(x, y) LogicalTapeRewind((x), (y), true) -#define tuplesort_gettuple_common(x, y, z) tuplesort_gettuple_common((x), (y), (z), should_free) -#endif - -#if PG_VERSION_NUM >= 110000 -#if PG_VERSION_NUM >= 130000 -#define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, false, NULL, NULL, 1) -#else -#define LogicalTapeSetCreate(X) LogicalTapeSetCreate(X, NULL, NULL, 1) -#endif -#define LogicalTapeFreeze(X, Y) LogicalTapeFreeze(X, Y, NULL) -#endif - -#if PG_VERSION_NUM >= 150000 -#define TAPE(state, tapenum) state->result_tape -#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->result_tape, ##args) -#else -#define TAPE(state, tapenum) state->tapeset, tapenum -#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->tapeset, tapenum, ##args) -#endif - /* For PGPRO since v.13 trace_sort is imported from backend by including its * declaration in guc.h (guc.h contains added Windows export/import magic to be done * during postgres.exe compilation). @@ -89,127 +65,234 @@ typedef struct RumTuplesortstateExt FmgrInfo *cmp; } RumTuplesortstateExt; -static RumTuplesortstate * rum_tuplesort_begin_common(int workMem, bool randomAccess); static int comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state); static int comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state); static int comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state, bool compareItemPointer); - -static void copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup); - -#if PG_VERSION_NUM >= 150000 -static void writetup_rum(RumTuplesortstate * state, LogicalTape *unused, - SortTuple *stup); -static void readtup_rum(RumTuplesortstate * state, SortTuple *stup, - LogicalTape *unused, unsigned int len); -static void writetup_rumitem(RumTuplesortstate * state, LogicalTape *unused, - SortTuple *stup); -static void readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, - LogicalTape *unused, unsigned int len); -#else -static void writetup_rum(RumTuplesortstate * state, int tapenum, - SortTuple *stup); -static void readtup_rum(RumTuplesortstate * state, SortTuple *stup, - int tapenum, unsigned int len); -static void writetup_rumitem(RumTuplesortstate * state, int tapenum, - SortTuple *stup); -static void readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, - int tapenum, unsigned int len); -#endif - static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state); +static void copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup); static void copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup); -/* - * rum_tuplesort_begin_xxx - * - * Initialize for a tuple sort operation. - * - * After calling rum_tuplesort_begin, the caller should call rum_tuplesort_putXXX - * zero or more times, then call rum_tuplesort_performsort when all the tuples - * have been supplied. After performsort, retrieve the tuples in sorted - * order by calling rum_tuplesort_getXXX until it returns false/NULL. (If random - * access was requested, rescan, markpos, and restorepos can also be called.) - * Call rum_tuplesort_end to terminate the operation and release memory/disk space. - * - * Each variant of rum_tuplesort_begin has a workMem parameter specifying the - * maximum number of kilobytes of RAM to use before spilling data to disk. - * (The normal value of this parameter is work_mem, but some callers use - * other values.) Each variant also has a randomAccess parameter specifying - * whether the caller needs non-sequential access to the sort result. - */ +static int +comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +{ + return comparetup_rum(a, b, state, true); +} -static RumTuplesortstate * -rum_tuplesort_begin_common(int workMem, bool randomAccess) +static int +comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) { - RumTuplesortstate *state; - MemoryContext sortcontext; - MemoryContext oldcontext; + return comparetup_rum(a, b, state, false); +} - /* - * Create a working memory context for this sort operation. All data - * needed by the sort will live inside this context. - */ - sortcontext = RumContextCreate(CurrentMemoryContext, "TupleSort"); +static int +comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state, bool compareItemPointer) +{ + RumSortItem *i1, + *i2; + float8 v1 = DatumGetFloat8(a->datum1); + float8 v2 = DatumGetFloat8(b->datum1); + int i; + + if (v1 < v2) + return -1; + else if (v1 > v2) + return 1; + + i1 = (RumSortItem *) a->tuple; + i2 = (RumSortItem *) b->tuple; + for (i = 1; i < state->nKeys; i++) + { + if (i1->data[i] < i2->data[i]) + return -1; + else if (i1->data[i] > i2->data[i]) + return 1; + } + + if (!compareItemPointer) + return 0; /* - * Make the Tuplesortstate within the per-sort context. This way, we - * don't need a separate pfree() operation for it at shutdown. + * If key values are equal, we sort on ItemPointer. */ - oldcontext = MemoryContextSwitchTo(sortcontext); + if (i1->iptr.ip_blkid.bi_hi < i2->iptr.ip_blkid.bi_hi) + return -1; + else if (i1->iptr.ip_blkid.bi_hi > i2->iptr.ip_blkid.bi_hi) + return 1; - state = (RumTuplesortstate *) palloc0(sizeof(RumTuplesortstate)); + if (i1->iptr.ip_blkid.bi_lo < i2->iptr.ip_blkid.bi_lo) + return -1; + else if (i1->iptr.ip_blkid.bi_lo > i2->iptr.ip_blkid.bi_lo) + return 1; -#ifdef TRACE_SORT - if (trace_sort) - pg_rusage_init(&state->ru_start); -#endif + if (i1->iptr.ip_posid < i2->iptr.ip_posid) + return -1; + else if (i1->iptr.ip_posid > i2->iptr.ip_posid) + return 1; - state->status = TSS_INITIAL; - state->randomAccess = randomAccess; - state->bounded = false; - state->boundUsed = false; - state->allowedMem = workMem * 1024L; - state->availMem = state->allowedMem; - state->sortcontext = sortcontext; - state->tapeset = NULL; + return 0; +} - state->memtupcount = 0; +static int +comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +{ + RumItem *i1, + *i2; + + /* Extract RumItem from RumScanItem */ + i1 = (RumItem *) a->tuple; + i2 = (RumItem *) b->tuple; + + if (((RumTuplesortstateExt *) state)->cmp) + { + if (i1->addInfoIsNull || i2->addInfoIsNull) + { + if (!(i1->addInfoIsNull && i2->addInfoIsNull)) + return (i1->addInfoIsNull) ? 1 : -1; + /* go to itempointer compare */ + } + else + { + int r; + + r = DatumGetInt32(FunctionCall2(((RumTuplesortstateExt *) state)->cmp, + i1->addInfo, + i2->addInfo)); + + if (r != 0) + return r; + } + } /* - * Initial size of array must be more than ALLOCSET_SEPARATE_THRESHOLD; - * see comments in grow_memtuples(). + * If key values are equal, we sort on ItemPointer. */ - state->memtupsize = Max(1024, - ALLOCSET_SEPARATE_THRESHOLD / sizeof(SortTuple) + 1); + if (i1->iptr.ip_blkid.bi_hi < i2->iptr.ip_blkid.bi_hi) + return -1; + else if (i1->iptr.ip_blkid.bi_hi > i2->iptr.ip_blkid.bi_hi) + return 1; - state->growmemtuples = true; - state->memtuples = (SortTuple *) palloc(state->memtupsize * sizeof(SortTuple)); + if (i1->iptr.ip_blkid.bi_lo < i2->iptr.ip_blkid.bi_lo) + return -1; + else if (i1->iptr.ip_blkid.bi_lo > i2->iptr.ip_blkid.bi_lo) + return 1; - USEMEM(state, GetMemoryChunkSpace(state->memtuples)); + if (i1->iptr.ip_posid < i2->iptr.ip_posid) + return -1; + else if (i1->iptr.ip_posid > i2->iptr.ip_posid) + return 1; - /* workMem must be large enough for the minimal memtuples array */ - if (LACKMEM(state)) - elog(ERROR, "insufficient memory allowed for sort"); + return 0; +} - state->currentRun = 0; +static void +copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup) +{ + RumSortItem *item = (RumSortItem *) tup; - /* - * maxTapes, tapeRange, and Algorithm D variables will be initialized by - * inittapes(), if needed - */ + stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); + stup->isnull1 = false; + stup->tuple = tup; + USEMEM(state, GetMemoryChunkSpace(tup)); +} + +static void +copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) +{ + stup->isnull1 = true; + stup->tuple = palloc(sizeof(RumScanItem)); + memcpy(stup->tuple, tup, sizeof(RumScanItem)); + USEMEM(state, GetMemoryChunkSpace(stup->tuple)); +} #if PG_VERSION_NUM >= 150000 - state->result_tape = NULL; +#define LT_DEF LogicalTape *unused +#define TAPE(state, tapenum) state->result_tape +#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->result_tape, ##args) #else - state->result_tape = -1; /* flag that result tape has not been formed */ +#define LT_DEF int tapenum +#define TAPE(state, tapenum) state->tapeset, tapenum +#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->tapeset, tapenum, ##args) #endif - MemoryContextSwitchTo(oldcontext); - return state; +static void +writetup_rum(RumTuplesortstate * state, LT_DEF, SortTuple *stup) +{ + RumSortItem *item = (RumSortItem *) stup->tuple; + unsigned int writtenlen = RumSortItemSize(state->nKeys) + sizeof(unsigned int); + + LogicalTapeWrite(TAPE(state, tapenum), + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(TAPE(state, tapenum), + (void *) item, RumSortItemSize(state->nKeys)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(TAPE(state, tapenum), + (void *) &writtenlen, sizeof(writtenlen)); + + FREEMEM(state, GetMemoryChunkSpace(item)); + pfree(item); +} + +static void +writetup_rumitem(RumTuplesortstate * state, LT_DEF, SortTuple *stup) +{ + RumScanItem *item = (RumScanItem *) stup->tuple; + unsigned int writtenlen = sizeof(*item) + sizeof(unsigned int); + + LogicalTapeWrite(TAPE(state, tapenum), + (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(TAPE(state, tapenum), + (void *) item, sizeof(*item)); + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeWrite(TAPE(state, tapenum), + (void *) &writtenlen, sizeof(writtenlen)); + + FREEMEM(state, GetMemoryChunkSpace(item)); + pfree(item); +} + +static void +readtup_rum(RumTuplesortstate * state, SortTuple *stup, + LT_DEF, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + RumSortItem *item = (RumSortItem *) palloc(RumSortItemSize(state->nKeys)); + + Assert(tuplen == RumSortItemSize(state->nKeys)); + + USEMEM(state, GetMemoryChunkSpace(item)); + LogicalTapeReadExact_compat(state, tapenum, + (void *) item, RumSortItemSize(state->nKeys)); + stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); + stup->isnull1 = false; + stup->tuple = item; + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact_compat(state, tapenum, + &tuplen, sizeof(tuplen)); +} + +static void +readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, + LT_DEF, unsigned int len) +{ + unsigned int tuplen = len - sizeof(unsigned int); + RumScanItem *item = (RumScanItem *) palloc(sizeof(RumScanItem)); + + Assert(tuplen == sizeof(RumScanItem)); + + USEMEM(state, GetMemoryChunkSpace(item)); + LogicalTapeReadExact_compat(state, tapenum, + (void *) item, tuplen); + stup->isnull1 = true; + stup->tuple = item; + + if (state->randomAccess) /* need trailing length word? */ + LogicalTapeReadExact_compat(state, tapenum, + &tuplen, sizeof(tuplen)); } /* @@ -222,11 +305,15 @@ rum_tuplesort_get_memorycontext(RumTuplesortstate * state) return state->sortcontext; } +#if PG_VERSION_NUM >= 110000 +#define tuplesort_begin_common(x,y) tuplesort_begin_common((x), NULL, (y)) +#endif + RumTuplesortstate * rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer) { - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, randomAccess); + RumTuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->sortcontext); @@ -253,7 +340,7 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, RumTuplesortstate * rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) { - RumTuplesortstate *state = rum_tuplesort_begin_common(workMem, false); + RumTuplesortstate *state = tuplesort_begin_common(workMem, false); RumTuplesortstateExt *rs; MemoryContext oldcontext; @@ -392,7 +479,11 @@ static bool rum_tuplesort_gettuple_common(RumTuplesortstate * state, bool forward, SortTuple *stup, bool *should_free) { - bool res = tuplesort_gettuple_common(state, forward, stup); + bool res = tuplesort_gettuple_common(state, forward, stup +#if PG_VERSION_NUM < 100000 + ,should_free +#endif + ); switch (state->status) { @@ -470,234 +561,3 @@ rum_tuplesort_merge_order(long allowedMem) return mOrder; } - -static int -comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) -{ - return comparetup_rum(a, b, state, true); -} - -static int -comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) -{ - return comparetup_rum(a, b, state, false); -} - -static int -comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state, bool compareItemPointer) -{ - RumSortItem *i1, - *i2; - float8 v1 = DatumGetFloat8(a->datum1); - float8 v2 = DatumGetFloat8(b->datum1); - int i; - - if (v1 < v2) - return -1; - else if (v1 > v2) - return 1; - - i1 = (RumSortItem *) a->tuple; - i2 = (RumSortItem *) b->tuple; - for (i = 1; i < state->nKeys; i++) - { - if (i1->data[i] < i2->data[i]) - return -1; - else if (i1->data[i] > i2->data[i]) - return 1; - } - - if (!compareItemPointer) - return 0; - - /* - * If key values are equal, we sort on ItemPointer. - */ - if (i1->iptr.ip_blkid.bi_hi < i2->iptr.ip_blkid.bi_hi) - return -1; - else if (i1->iptr.ip_blkid.bi_hi > i2->iptr.ip_blkid.bi_hi) - return 1; - - if (i1->iptr.ip_blkid.bi_lo < i2->iptr.ip_blkid.bi_lo) - return -1; - else if (i1->iptr.ip_blkid.bi_lo > i2->iptr.ip_blkid.bi_lo) - return 1; - - if (i1->iptr.ip_posid < i2->iptr.ip_posid) - return -1; - else if (i1->iptr.ip_posid > i2->iptr.ip_posid) - return 1; - - return 0; -} - -static void -copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup) -{ - RumSortItem *item = (RumSortItem *) tup; - - stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); - stup->isnull1 = false; - stup->tuple = tup; - USEMEM(state, GetMemoryChunkSpace(tup)); -} - -#if PG_VERSION_NUM >= 150000 -static void -writetup_rum(RumTuplesortstate * state, LogicalTape *unused, - SortTuple *stup) -#else -static void -writetup_rum(RumTuplesortstate * state, int tapenum, SortTuple *stup) -#endif -{ - RumSortItem *item = (RumSortItem *) stup->tuple; - unsigned int writtenlen = RumSortItemSize(state->nKeys) + sizeof(unsigned int); - - LogicalTapeWrite(TAPE(state, tapenum), - (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(TAPE(state, tapenum), - (void *) item, RumSortItemSize(state->nKeys)); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(TAPE(state, tapenum), - (void *) &writtenlen, sizeof(writtenlen)); - - FREEMEM(state, GetMemoryChunkSpace(item)); - pfree(item); -} - -static void -#if PG_VERSION_NUM >= 150000 -readtup_rum(RumTuplesortstate * state, SortTuple *stup, - LogicalTape *unused, unsigned int len) -#else -readtup_rum(RumTuplesortstate * state, SortTuple *stup, - int tapenum, unsigned int len) -#endif -{ - unsigned int tuplen = len - sizeof(unsigned int); - RumSortItem *item = (RumSortItem *) palloc(RumSortItemSize(state->nKeys)); - - Assert(tuplen == RumSortItemSize(state->nKeys)); - - USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact_compat(state, tapenum, - (void *) item, RumSortItemSize(state->nKeys)); - stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); - stup->isnull1 = false; - stup->tuple = item; - - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact_compat(state, tapenum, - &tuplen, sizeof(tuplen)); -} - -static int -comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) -{ - RumItem *i1, - *i2; - - /* Extract RumItem from RumScanItem */ - i1 = (RumItem *) a->tuple; - i2 = (RumItem *) b->tuple; - - if (((RumTuplesortstateExt *) state)->cmp) - { - if (i1->addInfoIsNull || i2->addInfoIsNull) - { - if (!(i1->addInfoIsNull && i2->addInfoIsNull)) - return (i1->addInfoIsNull) ? 1 : -1; - /* go to itempointer compare */ - } - else - { - int r; - - r = DatumGetInt32(FunctionCall2(((RumTuplesortstateExt *) state)->cmp, - i1->addInfo, - i2->addInfo)); - - if (r != 0) - return r; - } - } - - /* - * If key values are equal, we sort on ItemPointer. - */ - if (i1->iptr.ip_blkid.bi_hi < i2->iptr.ip_blkid.bi_hi) - return -1; - else if (i1->iptr.ip_blkid.bi_hi > i2->iptr.ip_blkid.bi_hi) - return 1; - - if (i1->iptr.ip_blkid.bi_lo < i2->iptr.ip_blkid.bi_lo) - return -1; - else if (i1->iptr.ip_blkid.bi_lo > i2->iptr.ip_blkid.bi_lo) - return 1; - - if (i1->iptr.ip_posid < i2->iptr.ip_posid) - return -1; - else if (i1->iptr.ip_posid > i2->iptr.ip_posid) - return 1; - - return 0; -} - -static void -copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) -{ - stup->isnull1 = true; - stup->tuple = palloc(sizeof(RumScanItem)); - memcpy(stup->tuple, tup, sizeof(RumScanItem)); - USEMEM(state, GetMemoryChunkSpace(stup->tuple)); -} - -#if PG_VERSION_NUM >= 150000 -static void -writetup_rumitem(RumTuplesortstate * state, LogicalTape *unused, - SortTuple *stup) -#else -static void -writetup_rumitem(RumTuplesortstate * state, int tapenum, SortTuple *stup) -#endif -{ - RumScanItem *item = (RumScanItem *) stup->tuple; - unsigned int writtenlen = sizeof(*item) + sizeof(unsigned int); - - LogicalTapeWrite(TAPE(state, tapenum), - (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(TAPE(state, tapenum), - (void *) item, sizeof(*item)); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(TAPE(state, tapenum), - (void *) &writtenlen, sizeof(writtenlen)); - - FREEMEM(state, GetMemoryChunkSpace(item)); - pfree(item); -} - -static void -#if PG_VERSION_NUM >= 150000 -readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, - LogicalTape *unused, unsigned int len) -#else -readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, - int tapenum, unsigned int len) -#endif -{ - unsigned int tuplen = len - sizeof(unsigned int); - RumScanItem *item = (RumScanItem *) palloc(sizeof(RumScanItem)); - - Assert(tuplen == sizeof(RumScanItem)); - - USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact_compat(state, tapenum, - (void *) item, tuplen); - stup->isnull1 = true; - stup->tuple = item; - - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact_compat(state, tapenum, - &tuplen, sizeof(tuplen)); -} From a904b36891fbba3eef0ca12a6f03ca406a582d49 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 11 Nov 2021 18:32:41 +0400 Subject: [PATCH 117/182] Further refactoring tuplesort - Move code duplicates into separate functions - Disable import of trace_sort from backend ads it is unconditionally defined in the module (in tuplesortXX.c) --- src/rumsort.c | 310 +++++++++++++++++++------------------------------- src/rumsort.h | 3 - 2 files changed, 115 insertions(+), 198 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 113b7dc434..215496d282 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -43,18 +43,6 @@ #include "tuplesort96.c" #endif -/* For PGPRO since v.13 trace_sort is imported from backend by including its - * declaration in guc.h (guc.h contains added Windows export/import magic to be done - * during postgres.exe compilation). - * For older or non-PGPRO versions on Windows platform trace_sort is not exported by - * backend so it is declared local for this case. - */ -#ifdef TRACE_SORT -#if ( !defined (_MSC_VER) || (PG_VERSION_NUM >= 130000 && defined (PGPRO_VERSION)) ) -#include "utils/guc.h" -#endif -#endif - /* * We need extra field in a state structure but we should not modify struct RumTuplesortstate * which is inherited from Tuplesortstate core function. @@ -65,27 +53,39 @@ typedef struct RumTuplesortstateExt FmgrInfo *cmp; } RumTuplesortstateExt; +static int compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2); +static int comparetup_rum(const SortTuple *a, const SortTuple *b, + RumTuplesortstate * state, bool compareItemPointer); static int comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state); static int comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state); -static int comparetup_rum(const SortTuple *a, const SortTuple *b, - RumTuplesortstate * state, bool compareItemPointer); static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state); static void copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup); static void copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup); +static void *rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *should_free); +static void rum_tuplesort_putrum_internal(RumTuplesortstate * state, void *item); static int -comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2) { - return comparetup_rum(a, b, state, true); -} + if (p1.ip_blkid.bi_hi < p2.ip_blkid.bi_hi) + return -1; + else if (p1.ip_blkid.bi_hi > p2.ip_blkid.bi_hi) + return 1; -static int -comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) -{ - return comparetup_rum(a, b, state, false); + if (p1.ip_blkid.bi_lo < p2.ip_blkid.bi_lo) + return -1; + else if (p1.ip_blkid.bi_lo > p2.ip_blkid.bi_lo) + return 1; + + if (p1.ip_posid < p2.ip_posid) + return -1; + else if (p1.ip_posid > p2.ip_posid) + return 1; + + return 0; } static int @@ -104,6 +104,7 @@ comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state i1 = (RumSortItem *) a->tuple; i2 = (RumSortItem *) b->tuple; + for (i = 1; i < state->nKeys; i++) { if (i1->data[i] < i2->data[i]) @@ -118,22 +119,19 @@ comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state /* * If key values are equal, we sort on ItemPointer. */ - if (i1->iptr.ip_blkid.bi_hi < i2->iptr.ip_blkid.bi_hi) - return -1; - else if (i1->iptr.ip_blkid.bi_hi > i2->iptr.ip_blkid.bi_hi) - return 1; - - if (i1->iptr.ip_blkid.bi_lo < i2->iptr.ip_blkid.bi_lo) - return -1; - else if (i1->iptr.ip_blkid.bi_lo > i2->iptr.ip_blkid.bi_lo) - return 1; + return compare_rum_itempointer(i1->iptr, i2->iptr); +} - if (i1->iptr.ip_posid < i2->iptr.ip_posid) - return -1; - else if (i1->iptr.ip_posid > i2->iptr.ip_posid) - return 1; +static int +comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +{ + return comparetup_rum(a, b, state, true); +} - return 0; +static int +comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +{ + return comparetup_rum(a, b, state, false); } static int @@ -170,22 +168,7 @@ comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * s /* * If key values are equal, we sort on ItemPointer. */ - if (i1->iptr.ip_blkid.bi_hi < i2->iptr.ip_blkid.bi_hi) - return -1; - else if (i1->iptr.ip_blkid.bi_hi > i2->iptr.ip_blkid.bi_hi) - return 1; - - if (i1->iptr.ip_blkid.bi_lo < i2->iptr.ip_blkid.bi_lo) - return -1; - else if (i1->iptr.ip_blkid.bi_lo > i2->iptr.ip_blkid.bi_lo) - return 1; - - if (i1->iptr.ip_posid < i2->iptr.ip_posid) - return -1; - else if (i1->iptr.ip_posid > i2->iptr.ip_posid) - return 1; - - return 0; + return compare_rum_itempointer(i1->iptr, i2->iptr); } static void @@ -209,27 +192,30 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) } #if PG_VERSION_NUM >= 150000 -#define LT_DEF LogicalTape *unused -#define TAPE(state, tapenum) state->result_tape -#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->result_tape, ##args) +#define LT_TYPE LogicalTape * +#define LT_ARG unused +#define TAPE(state, LT_ARG) state->result_tape +#define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->result_tape, ##args) #else -#define LT_DEF int tapenum -#define TAPE(state, tapenum) state->tapeset, tapenum -#define LogicalTapeReadExact_compat(state, tapenum, args...) LogicalTapeReadExact(state->tapeset, tapenum, ##args) +#define LT_TYPE int +#define LT_ARG tapenum +#define TAPE(state, LT_ARG) state->tapeset, LT_ARG +#define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->tapeset, LT_ARG, ##args) #endif static void -writetup_rum(RumTuplesortstate * state, LT_DEF, SortTuple *stup) +writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup, bool is_item) { RumSortItem *item = (RumSortItem *) stup->tuple; - unsigned int writtenlen = RumSortItemSize(state->nKeys) + sizeof(unsigned int); + size_t size = is_item ? sizeof(*item) : RumSortItemSize(state->nKeys); + unsigned int writtenlen = size + sizeof(unsigned int); - LogicalTapeWrite(TAPE(state, tapenum), + LogicalTapeWrite(TAPE(state, LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(TAPE(state, tapenum), - (void *) item, RumSortItemSize(state->nKeys)); + LogicalTapeWrite(TAPE(state, LT_ARG), + (void *) item, size); if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(TAPE(state, tapenum), + LogicalTapeWrite(TAPE(state, LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); FREEMEM(state, GetMemoryChunkSpace(item)); @@ -237,72 +223,52 @@ writetup_rum(RumTuplesortstate * state, LT_DEF, SortTuple *stup) } static void -writetup_rumitem(RumTuplesortstate * state, LT_DEF, SortTuple *stup) +writetup_rum(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) { - RumScanItem *item = (RumScanItem *) stup->tuple; - unsigned int writtenlen = sizeof(*item) + sizeof(unsigned int); - - LogicalTapeWrite(TAPE(state, tapenum), - (void *) &writtenlen, sizeof(writtenlen)); - LogicalTapeWrite(TAPE(state, tapenum), - (void *) item, sizeof(*item)); - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeWrite(TAPE(state, tapenum), - (void *) &writtenlen, sizeof(writtenlen)); + writetup_rum_internal(state, LT_ARG, stup, false); +} - FREEMEM(state, GetMemoryChunkSpace(item)); - pfree(item); +static void +writetup_rumitem(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) +{ + writetup_rum_internal(state, LT_ARG, stup, true); } static void -readtup_rum(RumTuplesortstate * state, SortTuple *stup, - LT_DEF, unsigned int len) +readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, + LT_TYPE LT_ARG, unsigned int len, bool is_item) { unsigned int tuplen = len - sizeof(unsigned int); - RumSortItem *item = (RumSortItem *) palloc(RumSortItemSize(state->nKeys)); + size_t size = is_item ? sizeof(RumScanItem) : RumSortItemSize(state->nKeys); + void *item = palloc(size); Assert(tuplen == RumSortItemSize(state->nKeys)); USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact_compat(state, tapenum, - (void *) item, RumSortItemSize(state->nKeys)); - stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); - stup->isnull1 = false; + LogicalTapeReadExact_compat(state, LT_ARG, item, size); + stup->tuple = item; + stup->isnull1 = is_item; + + if (!is_item) + stup->datum1 = Float8GetDatum(state->nKeys > 0 ? ((RumSortItem *) item)->data[0] : 0); if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact_compat(state, tapenum, - &tuplen, sizeof(tuplen)); + LogicalTapeReadExact_compat(state, LT_ARG, &tuplen, sizeof(tuplen)); } static void -readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, - LT_DEF, unsigned int len) +readtup_rum(RumTuplesortstate * state, SortTuple *stup, + LT_TYPE LT_ARG, unsigned int len) { - unsigned int tuplen = len - sizeof(unsigned int); - RumScanItem *item = (RumScanItem *) palloc(sizeof(RumScanItem)); - - Assert(tuplen == sizeof(RumScanItem)); - - USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact_compat(state, tapenum, - (void *) item, tuplen); - stup->isnull1 = true; - stup->tuple = item; - - if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact_compat(state, tapenum, - &tuplen, sizeof(tuplen)); + readtup_rum_internal(state, stup, LT_ARG, len, false); } -/* - * Get sort state memory context. Currently it is used only to allocate - * RumSortItem. - */ -MemoryContext -rum_tuplesort_get_memorycontext(RumTuplesortstate * state) +static void +readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, + LT_TYPE LT_ARG, unsigned int len) { - return state->sortcontext; + readtup_rum_internal(state, stup, LT_ARG, len, true); } #if PG_VERSION_NUM >= 110000 @@ -347,7 +313,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) oldcontext = MemoryContextSwitchTo(state->sortcontext); /* Allocate extended state in the same context as state */ - rs = palloc(sizeof(RumTuplesortstateExt)); + rs = palloc(sizeof(*rs)); #ifdef TRACE_SORT if (trace_sort) @@ -361,6 +327,8 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) state->writetup = writetup_rumitem; state->readtup = readtup_rumitem; memcpy(&rs->ts, state, sizeof(RumTuplesortstate)); + pfree(state); /* just to be sure *state isn't used anywhere + * else */ MemoryContextSwitchTo(oldcontext); @@ -430,8 +398,18 @@ rum_tuplesort_end(RumTuplesortstate * state) MemoryContextDelete(state->sortcontext); } -void -rum_tuplesort_putrum(RumTuplesortstate * state, RumSortItem * item) +/* + * Get sort state memory context. Currently it is used only to allocate + * RumSortItem. + */ +MemoryContext +rum_tuplesort_get_memorycontext(RumTuplesortstate * state) +{ + return state->sortcontext; +} + +static void +rum_tuplesort_putrum_internal(RumTuplesortstate * state, void *item) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); SortTuple stup; @@ -440,7 +418,7 @@ rum_tuplesort_putrum(RumTuplesortstate * state, RumSortItem * item) * Copy the given tuple into memory we control, and decrease availMem. * Then call the common code. */ - COPYTUP(state, &stup, (void *) item); + COPYTUP(state, &stup, item); puttuple_common(state, &stup); @@ -448,20 +426,15 @@ rum_tuplesort_putrum(RumTuplesortstate * state, RumSortItem * item) } void -rum_tuplesort_putrumitem(RumTuplesortstate * state, RumScanItem * item) +rum_tuplesort_putrum(RumTuplesortstate * state, RumSortItem * item) { - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - /* - * Copy the given tuple into memory we control, and decrease availMem. - * Then call the common code. - */ - COPYTUP(state, &stup, (void *) item); - - puttuple_common(state, &stup); + rum_tuplesort_putrum_internal(state, item); +} - MemoryContextSwitchTo(oldcontext); +void +rum_tuplesort_putrumitem(RumTuplesortstate * state, RumScanItem * item) +{ + rum_tuplesort_putrum_internal(state, item); } void @@ -474,90 +447,37 @@ rum_tuplesort_performsort(RumTuplesortstate * state) * Internal routine to fetch the next tuple in either forward or back * direction into *stup. Returns false if no more tuples. * If *should_free is set, the caller must pfree stup.tuple when done with it. + * + * NOTE: in PG 10 and newer tuplesort_gettuple_common allocates tuple in tuplesort + * context and it should not be freed by caller. */ -static bool -rum_tuplesort_gettuple_common(RumTuplesortstate * state, bool forward, - SortTuple *stup, bool *should_free) +static void * +rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *should_free) { - bool res = tuplesort_gettuple_common(state, forward, stup -#if PG_VERSION_NUM < 100000 - ,should_free + MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); + SortTuple stup; + bool res; + +#if PG_VERSION_NUM >= 100000 + res = tuplesort_gettuple_common(state, forward, &stup); + *should_free = false; +#else + res = tuplesort_gettuple_common(state, forward, &stup, should_free); #endif - ); - switch (state->status) - { - case TSS_SORTEDINMEM: - *should_free = false; - break; - - case TSS_SORTEDONTAPE: - case TSS_FINALMERGE: - *should_free = true; - break; - - default: - elog(ERROR, "invalid tuplesort state"); - return false; /* keep compiler quiet */ - } + MemoryContextSwitchTo(oldcontext); - return res; + return res ? stup.tuple : NULL; } RumSortItem * rum_tuplesort_getrum(RumTuplesortstate * state, bool forward, bool *should_free) { - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - if (!rum_tuplesort_gettuple_common(state, forward, &stup, should_free)) - stup.tuple = NULL; - - MemoryContextSwitchTo(oldcontext); - - return (RumSortItem *) stup.tuple; + return (RumSortItem *) rum_tuplesort_getrum_internal(state, forward, should_free); } RumScanItem * rum_tuplesort_getrumitem(RumTuplesortstate * state, bool forward, bool *should_free) { - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - if (!rum_tuplesort_gettuple_common(state, forward, &stup, should_free)) - stup.tuple = NULL; - - MemoryContextSwitchTo(oldcontext); - - return (RumScanItem *) stup.tuple; -} - -/* - * rum_tuplesort_merge_order - report merge order we'll use for given memory - * (note: "merge order" just means the number of input tapes in the merge). - * - * This is exported for use by the planner. allowedMem is in bytes. - */ -int -rum_tuplesort_merge_order(long allowedMem) -{ - int mOrder; - - /* - * We need one tape for each merge input, plus another one for the output, - * and each of these tapes needs buffer space. In addition we want - * MERGE_BUFFER_SIZE workspace per input tape (but the output tape doesn't - * count). - * - * Note: you might be thinking we need to account for the memtuples[] - * array in this calculation, but we effectively treat that as part of the - * MERGE_BUFFER_SIZE workspace. - */ - mOrder = (allowedMem - TAPE_BUFFER_OVERHEAD) / - (MERGE_BUFFER_SIZE + TAPE_BUFFER_OVERHEAD); - - /* Even in minimum memory, use at least a MINORDER merge */ - mOrder = Max(mOrder, MINORDER); - - return mOrder; + return (RumScanItem *) rum_tuplesort_getrum_internal(state, forward, should_free); } diff --git a/src/rumsort.h b/src/rumsort.h index 30faeb9d17..8b6c9a645f 100644 --- a/src/rumsort.h +++ b/src/rumsort.h @@ -38,7 +38,6 @@ typedef struct #define RumSortItemSize(nKeys) (offsetof(RumSortItem,data)+(nKeys)*sizeof(float8)) extern MemoryContext rum_tuplesort_get_memorycontext(RumTuplesortstate *state); - extern RumTuplesortstate *rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer); extern RumTuplesortstate *rum_tuplesort_begin_rumitem(int workMem, @@ -56,6 +55,4 @@ extern struct RumScanItem *rum_tuplesort_getrumitem(RumTuplesortstate *state, bo extern void rum_tuplesort_end(RumTuplesortstate *state); -extern int rum_tuplesort_merge_order(long allowedMem); - #endif /* RUMSORT_H */ From 0eddfba18caf632c41adaa9faefb9efdca10ccf7 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Mon, 22 Nov 2021 17:43:59 +0300 Subject: [PATCH 118/182] Fix varlena allocation to avoid garbage contents. --- src/rum_ts_utils.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 5a335daaf5..9729f13f1f 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -2271,7 +2271,8 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) count2 = count_pos(in2, VARSIZE_ANY_EXHDR(addInfo2)), countRes = 0; int i1 = 0, i2 = 0; - Size size; + Size size, + size_compressed; WordEntryPos pos1 = 0, pos2 = 0, *pos; @@ -2343,10 +2344,11 @@ rum_ts_join_pos(PG_FUNCTION_ARGS) * uncompressed positions. So allocate memory with a margin. */ size = VARHDRSZ + 2 * sizeof(WordEntryPos) * countRes; - result = palloc(size); + result = palloc0(size); - size = compress_pos(result->vl_dat, pos, countRes) + VARHDRSZ; - SET_VARSIZE(result, size); + size_compressed = compress_pos(result->vl_dat, pos, countRes) + VARHDRSZ; + Assert(size >= size_compressed); + SET_VARSIZE(result, size_compressed); PG_RETURN_BYTEA_P(result); } From af616a0dd1fea1ec19a2c48a310dba74409341fa Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Fri, 26 Nov 2021 18:00:33 +0300 Subject: [PATCH 119/182] Fix item addinfo corruption. Copy item additional information from leaf data page. It is necessary when additional information is used after the data page is unlocked. In paricular when sorting of items should be done. --- src/rumget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/rumget.c b/src/rumget.c index 571b9cf7e5..ae28084979 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -464,11 +464,12 @@ collectMatchBitmap(RumBtreeData * btree, RumBtreeStack * stack, char *ptr = RumGetPosting(itup); RumScanItem item; + MemSet(&item, 0, sizeof(item)); ItemPointerSetMin(&item.item.iptr); for (i = 0; i < RumGetNPosting(itup); i++) { ptr = rumDataPageLeafRead(ptr, scanEntry->attnum, &item.item, - false, rumstate); + true, rumstate); SCAN_ITEM_PUT_KEY(scanEntry, item, idatum, icategory); rum_tuplesort_putrumitem(scanEntry->matchSortstate, &item); } From f83b610b307fb7bd5cf18848bfcaeaaa5a90ce3b Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Fri, 26 Nov 2021 18:28:38 +0300 Subject: [PATCH 120/182] Use vanilla tuplesort_free call. --- src/rumsort.c | 50 +------------------------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 215496d282..65dbb4ba69 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -347,55 +347,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) void rum_tuplesort_end(RumTuplesortstate * state) { - /* context swap probably not needed, but let's be safe */ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - -#ifdef TRACE_SORT - long spaceUsed; - - if (state->tapeset) - spaceUsed = LogicalTapeSetBlocks(state->tapeset); - else - spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; -#endif - - /* - * Delete temporary "tape" files, if any. - * - * Note: want to include this in reported total cost of sort, hence need - * for two #ifdef TRACE_SORT sections. - */ - if (state->tapeset) - LogicalTapeSetClose(state->tapeset); - -#ifdef TRACE_SORT - if (trace_sort) - { - if (state->tapeset) - elog(LOG, "external sort ended, %ld disk blocks used: %s", - spaceUsed, pg_rusage_show(&state->ru_start)); - else - elog(LOG, "internal sort ended, %ld KB used: %s", - spaceUsed, pg_rusage_show(&state->ru_start)); - } -#endif - - /* Free any execution state created for CLUSTER case */ - if (state->estate != NULL) - { - ExprContext *econtext = GetPerTupleExprContext(state->estate); - - ExecDropSingleTupleTableSlot(econtext->ecxt_scantuple); - FreeExecutorState(state->estate); - } - - MemoryContextSwitchTo(oldcontext); - - /* - * Free the per-sort memory context, thereby releasing all working memory, - * including the Tuplesortstate struct itself. - */ - MemoryContextDelete(state->sortcontext); + tuplesort_free(state); } /* From d9963d9886a4eff0a298eff40233ca673fbb06bf Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Fri, 26 Nov 2021 18:31:49 +0300 Subject: [PATCH 121/182] Fix varlena allocation to avoid garbage contents (part2). --- src/rum_ts_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 9729f13f1f..23e979a356 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -1170,7 +1170,7 @@ rum_extract_tsvector_internal(TSVector vector, * uncompressed positions. So allocate memory with a margin. */ posDataSize = VARHDRSZ + 2 * posVec->npos * sizeof(WordEntryPos); - posData = (bytea *) palloc(posDataSize); + posData = (bytea *) palloc0(posDataSize); posDataSize = compress_pos(posData->vl_dat, posVec->pos, posVec->npos) + VARHDRSZ; SET_VARSIZE(posData, posDataSize); From 55efc6f56f41472b344425ef03966d5d2b0b64bc Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 29 Nov 2021 17:38:49 +0400 Subject: [PATCH 122/182] Make itemsize calculation same in writetup_rum and readtup_rum It was incorrectly set different RumSortSize vs RumScanSize --- src/rumsort.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 65dbb4ba69..64e687ccb8 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -203,11 +203,13 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) #define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->tapeset, LT_ARG, ##args) #endif +#define ITEMSIZE(is_scanitem) is_scanitem ? sizeof(RumScanItem) : RumSortItemSize(state->nKeys); + static void writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup, bool is_item) { - RumSortItem *item = (RumSortItem *) stup->tuple; - size_t size = is_item ? sizeof(*item) : RumSortItemSize(state->nKeys); + void *item = stup->tuple; + size_t size = ITEMSIZE(is_item); unsigned int writtenlen = size + sizeof(unsigned int); LogicalTapeWrite(TAPE(state, LT_ARG), @@ -239,7 +241,7 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, LT_TYPE LT_ARG, unsigned int len, bool is_item) { unsigned int tuplen = len - sizeof(unsigned int); - size_t size = is_item ? sizeof(RumScanItem) : RumSortItemSize(state->nKeys); + size_t size = ITEMSIZE(is_item); void *item = palloc(size); Assert(tuplen == RumSortItemSize(state->nKeys)); From 4b3b24e5593a83ececc45604956f77b44078c85b Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 29 Nov 2021 18:43:45 +0400 Subject: [PATCH 123/182] Use state for rumSortItem/rumScanItem size calculation --- src/rumsort.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 64e687ccb8..dd02a677e2 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -203,13 +203,21 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) #define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->tapeset, LT_ARG, ##args) #endif -#define ITEMSIZE(is_scanitem) is_scanitem ? sizeof(RumScanItem) : RumSortItemSize(state->nKeys); +static size_t rum_item_size(RumTuplesortstate * state) +{ + if (state->copytup == copytup_rum) + return RumSortItemSize(state->nKeys); + else if (state->copytup == copytup_rumitem) + return sizeof(RumScanItem); + else + elog (FATAL, "Unknown RUM state"); +} static void -writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup, bool is_item) +writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) { void *item = stup->tuple; - size_t size = ITEMSIZE(is_item); + size_t size = rum_item_size(state); unsigned int writtenlen = size + sizeof(unsigned int); LogicalTapeWrite(TAPE(state, LT_ARG), @@ -227,13 +235,13 @@ writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup static void writetup_rum(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) { - writetup_rum_internal(state, LT_ARG, stup, false); + writetup_rum_internal(state, LT_ARG, stup); } static void writetup_rumitem(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) { - writetup_rum_internal(state, LT_ARG, stup, true); + writetup_rum_internal(state, LT_ARG, stup); } static void @@ -241,7 +249,7 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, LT_TYPE LT_ARG, unsigned int len, bool is_item) { unsigned int tuplen = len - sizeof(unsigned int); - size_t size = ITEMSIZE(is_item); + size_t size = rum_item_size(state); void *item = palloc(size); Assert(tuplen == RumSortItemSize(state->nKeys)); From f608c366f98c4fd4a9fd583181fc757d9af927c1 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 29 Nov 2021 19:07:06 +0400 Subject: [PATCH 124/182] Use puttuple_common function from vanilla tuplesort --- src/rumsort.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index dd02a677e2..76096a0f06 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -65,7 +65,6 @@ static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, static void copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup); static void copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup); static void *rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *should_free); -static void rum_tuplesort_putrum_internal(RumTuplesortstate * state, void *item); static int compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2) @@ -370,33 +369,16 @@ rum_tuplesort_get_memorycontext(RumTuplesortstate * state) return state->sortcontext; } -static void -rum_tuplesort_putrum_internal(RumTuplesortstate * state, void *item) -{ - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - - /* - * Copy the given tuple into memory we control, and decrease availMem. - * Then call the common code. - */ - COPYTUP(state, &stup, item); - - puttuple_common(state, &stup); - - MemoryContextSwitchTo(oldcontext); -} - void -rum_tuplesort_putrum(RumTuplesortstate * state, RumSortItem * item) +rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem *item) { - rum_tuplesort_putrum_internal(state, item); + tuplesort_puttupleslot(state, (TupleTableSlot *) item); } void -rum_tuplesort_putrumitem(RumTuplesortstate * state, RumScanItem * item) +rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem *item) { - rum_tuplesort_putrum_internal(state, item); + tuplesort_puttupleslot(state, (TupleTableSlot *) item); } void From 1605f9eebe70278a60dac53476312dc6d66e39d0 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Mon, 29 Nov 2021 20:21:16 +0300 Subject: [PATCH 125/182] Fix valgrind errors when work_mem < 1 MB - fix write uninited values in logical tape - fix double free --- src/rumget.c | 1 + src/rumsort.c | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/rumget.c b/src/rumget.c index ae28084979..e81421f6b1 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -252,6 +252,7 @@ scanPostingTree(Relation index, RumScanEntry scanEntry, RumScanItem item; Pointer ptr; + MemSet(&item, 0, sizeof(item)); ItemPointerSetMin(&item.item.iptr); ptr = RumDataPageGetData(page); diff --git a/src/rumsort.c b/src/rumsort.c index 76096a0f06..0db498b060 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -202,7 +202,8 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) #define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->tapeset, LT_ARG, ##args) #endif -static size_t rum_item_size(RumTuplesortstate * state) +static Size +rum_item_size(RumTuplesortstate * state) { if (state->copytup == copytup_rum) return RumSortItemSize(state->nKeys); @@ -226,9 +227,6 @@ writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup if (state->randomAccess) /* need trailing length word? */ LogicalTapeWrite(TAPE(state, LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); - - FREEMEM(state, GetMemoryChunkSpace(item)); - pfree(item); } static void @@ -251,7 +249,7 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, size_t size = rum_item_size(state); void *item = palloc(size); - Assert(tuplen == RumSortItemSize(state->nKeys)); + Assert(tuplen == size); USEMEM(state, GetMemoryChunkSpace(item)); LogicalTapeReadExact_compat(state, LT_ARG, item, size); From 809491207fd401c31c6a87e1f1581c5085c86e06 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Mon, 29 Nov 2021 20:25:07 +0300 Subject: [PATCH 126/182] Revert "Fix varlena allocation to avoid garbage contents (part2)." This reverts commit d9963d9886a4eff0a298eff40233ca673fbb06bf. --- src/rum_ts_utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 23e979a356..9729f13f1f 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -1170,7 +1170,7 @@ rum_extract_tsvector_internal(TSVector vector, * uncompressed positions. So allocate memory with a margin. */ posDataSize = VARHDRSZ + 2 * posVec->npos * sizeof(WordEntryPos); - posData = (bytea *) palloc0(posDataSize); + posData = (bytea *) palloc(posDataSize); posDataSize = compress_pos(posData->vl_dat, posVec->pos, posVec->npos) + VARHDRSZ; SET_VARSIZE(posData, posDataSize); From 87a2880d98930737b62f9c74fdfc25f4a3f60683 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 29 Nov 2021 22:11:24 +0400 Subject: [PATCH 127/182] Change call of (static) tuplesort_gettuple_common to public tuplesort_getindextuple --- src/rumsort.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 0db498b060..ca8f4b6327 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -386,30 +386,22 @@ rum_tuplesort_performsort(RumTuplesortstate * state) } /* - * Internal routine to fetch the next tuple in either forward or back - * direction into *stup. Returns false if no more tuples. + * Internal routine to fetch the next index tuple in either forward or back direction. + * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory context. Caller may not rely on tuple remaining valid after any further manipulation of tuplesort. * If *should_free is set, the caller must pfree stup.tuple when done with it. * - * NOTE: in PG 10 and newer tuplesort_gettuple_common allocates tuple in tuplesort - * context and it should not be freed by caller. + * NOTE: in PG 10 and newer tuple is always allocated tuple in tuplesort context and + * should not be freed by caller. */ static void * rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *should_free) { - MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - SortTuple stup; - bool res; - #if PG_VERSION_NUM >= 100000 - res = tuplesort_gettuple_common(state, forward, &stup); *should_free = false; + return (RumSortItem *)tuplesort_getindextuple(state, forward); #else - res = tuplesort_gettuple_common(state, forward, &stup, should_free); + return (RumSortItem *)tuplesort_getindextuple(state, forward, should_free); #endif - - MemoryContextSwitchTo(oldcontext); - - return res ? stup.tuple : NULL; } RumSortItem * From 04909340cc8c3a2daf0ef5c9f256c4430673622f Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 29 Nov 2021 22:53:21 +0400 Subject: [PATCH 128/182] Call tupesort_free as tuplesort_end before version 13 It was times when there were no tuplesort_free() at all --- src/rumsort.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/rumsort.c b/src/rumsort.c index ca8f4b6327..2057e69f86 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -354,7 +354,11 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) void rum_tuplesort_end(RumTuplesortstate * state) { +#if PG_VERSION_NUM >= 130000 tuplesort_free(state); +#else + tuplesort_end(state); +#endif } /* From 34437d333ecc2ff52b7389844c771b530dd1efb5 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 6 Dec 2021 16:15:48 +0400 Subject: [PATCH 129/182] Compatibility with changes in vanilla Random implementation in v15 --- src/rumget.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/rumget.c b/src/rumget.c index e81421f6b1..c48d191288 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -22,7 +22,9 @@ #if PG_VERSION_NUM >= 120000 #include "utils/float.h" #endif - +#if PG_VERSION_NUM >= 150000 +#include "common/pg_prng.h" +#endif #include "rum.h" /* GUC parameter */ @@ -1131,7 +1133,12 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) return true; } +#if PG_VERSION_NUM < 150000 #define rum_rand() (((double) random()) / ((double) MAX_RANDOM_VALUE)) +#else +#define rum_rand() pg_prng_double(&pg_global_prng_state) +#endif + #define dropItem(e) ( rum_rand() > ((double)RumFuzzySearchLimit)/((double)((e)->predictNumberResult)) ) /* From ae67572e6f20600671fb866706ba05da35d222ad Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 6 Dec 2021 17:01:58 +0400 Subject: [PATCH 130/182] Compatibility with tuplesort changes in v15 --- src/rumsort.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 2057e69f86..473139598c 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -192,9 +192,9 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) #if PG_VERSION_NUM >= 150000 #define LT_TYPE LogicalTape * -#define LT_ARG unused -#define TAPE(state, LT_ARG) state->result_tape -#define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->result_tape, ##args) +#define LT_ARG tape +#define TAPE(state, LT_ARG) LT_ARG +#define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(LT_ARG, ##args) #else #define LT_TYPE int #define LT_ARG tapenum From 6f47dc73ad5281b5daeb53f0f6137aca292fa183 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 6 Dec 2021 22:05:58 +0400 Subject: [PATCH 131/182] Make compatible with amroutine changes in V15 --- src/rumutil.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/rumutil.c b/src/rumutil.c index a24c1614ce..93a52f534f 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -128,6 +128,9 @@ rumhandler(PG_FUNCTION_ARGS) amroutine->ampredlocks = true; #if PG_VERSION_NUM >= 100000 amroutine->amcanparallel = false; +#endif +#if PG_VERSION_NUM >= 150000 + amroutine->amhotblocking = true; #endif amroutine->amkeytype = InvalidOid; From 655f5b3d00c7245a8fe669437b8adefff3480378 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 6 Dec 2021 23:44:47 +0400 Subject: [PATCH 132/182] Make compatible with windows build system Don't use ##args style in macros --- src/rumsort.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 473139598c..9c08d74528 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -194,12 +194,10 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) #define LT_TYPE LogicalTape * #define LT_ARG tape #define TAPE(state, LT_ARG) LT_ARG -#define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(LT_ARG, ##args) #else #define LT_TYPE int #define LT_ARG tapenum #define TAPE(state, LT_ARG) state->tapeset, LT_ARG -#define LogicalTapeReadExact_compat(state, LT_ARG, args...) LogicalTapeReadExact(state->tapeset, LT_ARG, ##args) #endif static Size @@ -252,8 +250,11 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, Assert(tuplen == size); USEMEM(state, GetMemoryChunkSpace(item)); - LogicalTapeReadExact_compat(state, LT_ARG, item, size); - +#if PG_VERSION_NUM >= 150000 + LogicalTapeReadExact(LT_ARG, item, size); +#else + LogicalTapeReadExact(state->tapeset, LT_ARG, item, size); +#endif stup->tuple = item; stup->isnull1 = is_item; @@ -261,7 +262,11 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, stup->datum1 = Float8GetDatum(state->nKeys > 0 ? ((RumSortItem *) item)->data[0] : 0); if (state->randomAccess) /* need trailing length word? */ - LogicalTapeReadExact_compat(state, LT_ARG, &tuplen, sizeof(tuplen)); +#if PG_VERSION_NUM >= 150000 + LogicalTapeReadExact(LT_ARG, &tuplen, sizeof(tuplen)); +#else + LogicalTapeReadExact(state->tapeset, LT_ARG, &tuplen, sizeof(tuplen)); +#endif } static void From 922bfbd50fa547c867384472dca483433bf9b5b2 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Wed, 8 Dec 2021 17:56:02 +0400 Subject: [PATCH 133/182] TAP test compatibility with PG15 --- t/001_wal.pl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/t/001_wal.pl b/t/001_wal.pl index 1ee47b76ae..880060fb7b 100644 --- a/t/001_wal.pl +++ b/t/001_wal.pl @@ -1,8 +1,8 @@ # Test generic xlog record work for rum index replication. use strict; use warnings; -use PostgresNode; -use TestLib; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; use Test::More tests => 31; my $node_master; @@ -50,7 +50,7 @@ sub test_index_replay } # Initialize master node -$node_master = get_new_node('master'); +$node_master = PostgreSQL::Test::Cluster->new('master'); $node_master->init(allows_streaming => 1); $node_master->start; my $backup_name = 'my_backup'; @@ -59,7 +59,7 @@ sub test_index_replay $node_master->backup($backup_name); # Create streaming standby linking to master -$node_standby = get_new_node('standby'); +$node_standby = PostgreSQL::Test::Cluster->new('standby'); $node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); $node_standby->start; From 016314d4e23c31f3e43bc406a4d9314294eeda81 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Mon, 13 Dec 2021 15:20:30 +0400 Subject: [PATCH 134/182] TAP test compatibility with PG15 (part 2). Automatically use old and new style node creation. --- t/001_wal.pl | 56 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/t/001_wal.pl b/t/001_wal.pl index 880060fb7b..a169683ee6 100644 --- a/t/001_wal.pl +++ b/t/001_wal.pl @@ -1,10 +1,31 @@ # Test generic xlog record work for rum index replication. use strict; use warnings; -use PostgreSQL::Test::Cluster; -use PostgreSQL::Test::Utils; use Test::More tests => 31; +my $pg_15_modules; + +BEGIN +{ + $pg_15_modules = eval + { + require PostgreSQL::Test::Cluster; + require PostgreSQL::Test::Utils; + return 1; + }; + + unless (defined $pg_15_modules) + { + $pg_15_modules = 0; + + require PostgresNode; + require TestLib; + } +} + +note('PostgreSQL 15 modules are used: ' . ($pg_15_modules ? 'yes' : 'no')); + + my $node_master; my $node_standby; @@ -50,7 +71,23 @@ sub test_index_replay } # Initialize master node -$node_master = PostgreSQL::Test::Cluster->new('master'); + +# Create node. +# Older versions of PostgreSQL modules use get_new_node function. +# Newer use standard perl object constructor syntax. +# Also applies for node_standby (below). +eval +{ + if ($pg_15_modules) + { + $node_master = PostgreSQL::Test::Cluster->new("master"); + } + else + { + $node_master = PostgresNode::get_new_node("master"); + } +}; + $node_master->init(allows_streaming => 1); $node_master->start; my $backup_name = 'my_backup'; @@ -59,7 +96,18 @@ sub test_index_replay $node_master->backup($backup_name); # Create streaming standby linking to master -$node_standby = PostgreSQL::Test::Cluster->new('standby'); +eval +{ + if ($pg_15_modules) + { + $node_standby = PostgreSQL::Test::Cluster->new("standby"); + } + else + { + $node_standby = PostgresNode::get_new_node("standby"); + } +}; + $node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1); $node_standby->start; From 420735671047e0fb19c1c75a40ae3498e8ff6bc1 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 14 Jan 2022 16:48:13 +0400 Subject: [PATCH 135/182] Fix travis CI config scripts --- .travis.yml | 10 ++++++++-- travis/Dockerfile.in | 1 + travis/run_tests.sh | 7 +++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9dd95cb320..8952b507cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,13 @@ notifications: on_failure: always env: - - PG_VERSION=11 LEVEL=hardcore + - PG_VERSION=14 + - PG_VERSION=14 LEVEL=hardcore + - PG_VERSION=13 + - PG_VERSION=13 LEVEL=hardcore + - PG_VERSION=12 + - PG_VERSION=12 LEVEL=hardcore - PG_VERSION=11 + - PG_VERSION=11 LEVEL=hardcore - PG_VERSION=10 - - PG_VERSION=9.6 + - PG_VERSION=10 LEVEL=hardcore diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in index 6c839839f0..2bfa60483d 100644 --- a/travis/Dockerfile.in +++ b/travis/Dockerfile.in @@ -2,6 +2,7 @@ FROM postgres:${PG_VERSION}-alpine # Install dependencies RUN apk add --no-cache \ + linux-headers \ openssl curl \ perl perl-ipc-run \ make musl-dev gcc bison flex coreutils \ diff --git a/travis/run_tests.sh b/travis/run_tests.sh index c1f598a196..5dff578c0e 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -36,7 +36,7 @@ if [ "$LEVEL" = "hardcore" ]; then # enable additional options ./configure \ - CFLAGS='-O0 -ggdb3 -fno-omit-frame-pointer' \ + CFLAGS='-fno-omit-frame-pointer' \ --enable-cassert \ --prefix=$CUSTOM_PG_BIN \ --quiet @@ -62,7 +62,10 @@ pg_config if [ "$LEVEL" = "hardcore" ]; then # perform static analyzis - scan-build --status-bugs make USE_PGXS=1 || status=$? + scan-build --status-bugs \ + -disable-checker core.UndefinedBinaryOperatorResult \ + -disable-checker deadcode.DeadStores \ + make USE_PGXS=1 || status=$? # something's wrong, exit now! if [ $status -ne 0 ]; then exit 1; fi From 933e8b5574859020b75302a9a3fd88cee06ddaf5 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 14 Jan 2022 16:48:13 +0400 Subject: [PATCH 136/182] Fix travis CI config scripts --- .travis.yml | 10 ++++++++-- travis/Dockerfile.in | 1 + travis/run_tests.sh | 7 +++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9dd95cb320..8952b507cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,13 @@ notifications: on_failure: always env: - - PG_VERSION=11 LEVEL=hardcore + - PG_VERSION=14 + - PG_VERSION=14 LEVEL=hardcore + - PG_VERSION=13 + - PG_VERSION=13 LEVEL=hardcore + - PG_VERSION=12 + - PG_VERSION=12 LEVEL=hardcore - PG_VERSION=11 + - PG_VERSION=11 LEVEL=hardcore - PG_VERSION=10 - - PG_VERSION=9.6 + - PG_VERSION=10 LEVEL=hardcore diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in index 6c839839f0..2bfa60483d 100644 --- a/travis/Dockerfile.in +++ b/travis/Dockerfile.in @@ -2,6 +2,7 @@ FROM postgres:${PG_VERSION}-alpine # Install dependencies RUN apk add --no-cache \ + linux-headers \ openssl curl \ perl perl-ipc-run \ make musl-dev gcc bison flex coreutils \ diff --git a/travis/run_tests.sh b/travis/run_tests.sh index c1f598a196..5dff578c0e 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -36,7 +36,7 @@ if [ "$LEVEL" = "hardcore" ]; then # enable additional options ./configure \ - CFLAGS='-O0 -ggdb3 -fno-omit-frame-pointer' \ + CFLAGS='-fno-omit-frame-pointer' \ --enable-cassert \ --prefix=$CUSTOM_PG_BIN \ --quiet @@ -62,7 +62,10 @@ pg_config if [ "$LEVEL" = "hardcore" ]; then # perform static analyzis - scan-build --status-bugs make USE_PGXS=1 || status=$? + scan-build --status-bugs \ + -disable-checker core.UndefinedBinaryOperatorResult \ + -disable-checker deadcode.DeadStores \ + make USE_PGXS=1 || status=$? # something's wrong, exit now! if [ $status -ne 0 ]; then exit 1; fi From 7c4c0f24a29af1eed097171841922116c3209176 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 1 Mar 2022 15:50:54 +0400 Subject: [PATCH 137/182] Update copyrights --- src/rum.h | 2 +- src/rum_arr_utils.c | 2 +- src/rum_ts_utils.c | 2 +- src/rumbtree.c | 2 +- src/rumbulk.c | 2 +- src/rumdatapage.c | 2 +- src/rumentrypage.c | 2 +- src/rumget.c | 2 +- src/ruminsert.c | 2 +- src/rumscan.c | 2 +- src/rumsort.c | 2 +- src/rumsort.h | 2 +- src/rumtsquery.c | 2 +- src/rumutil.c | 2 +- src/rumvacuum.c | 2 +- src/rumvalidate.c | 2 +- tests/pglist_tests.py | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/rum.h b/src/rum.h index 8f54edd5d4..5103935ab1 100644 --- a/src/rum.h +++ b/src/rum.h @@ -3,7 +3,7 @@ * rum.h * Exported definitions for RUM index. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 2006-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 86fab36074..ed7e6dacfb 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -3,7 +3,7 @@ * rum_arr_utils.c * various anyarray-search functions * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 9729f13f1f..536fc5375b 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -3,7 +3,7 @@ * rum_ts_utils.c * various text-search functions * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumbtree.c b/src/rumbtree.c index 2e1e520df9..2c168b100d 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumbulk.c b/src/rumbulk.c index b9e94df375..2b38bf46e3 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 999b90e726..1533adb89b 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumentrypage.c b/src/rumentrypage.c index c07fc3219a..9b759a1ff2 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumget.c b/src/rumget.c index c48d191288..3834394477 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -4,7 +4,7 @@ * fetch tuples from a RUM scan. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/ruminsert.c b/src/ruminsert.c index f42c8a9526..852ee0679b 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumscan.c b/src/rumscan.c index 8048215971..65d781c710 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.c b/src/rumsort.c index 9c08d74528..4ed425ccd2 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -8,7 +8,7 @@ * src/backend/utils/sort/tuplesort.c. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.h b/src/rumsort.h index 8b6c9a645f..dfa5117bf1 100644 --- a/src/rumsort.h +++ b/src/rumsort.h @@ -7,7 +7,7 @@ * It contains copy of static functions from * src/backend/utils/sort/tuplesort.c. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 0f10500c55..74189c37eb 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -3,7 +3,7 @@ * rumtsquery.c * Inverted fulltext search: indexing tsqueries. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumutil.c b/src/rumutil.c index 93a52f534f..a9ff1ef981 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 35cca00308..07a584121a 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres RUM * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvalidate.c b/src/rumvalidate.c index 1d73e1b73b..a8c9e91a8f 100644 --- a/src/rumvalidate.c +++ b/src/rumvalidate.c @@ -3,7 +3,7 @@ * rumvalidate.c * Opclass validator for RUM. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/tests/pglist_tests.py b/tests/pglist_tests.py index a693a1b606..7b8e4558e0 100644 --- a/tests/pglist_tests.py +++ b/tests/pglist_tests.py @@ -2,7 +2,7 @@ """ Test RUM index with big base 'pglist' - Copyright (c) 2015-2016, Postgres Professional + Copyright (c) 2015-2021, Postgres Professional """ import unittest import os From d9cc86f2c52df37823a2c2e5d51aaa3c4a6c3afc Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Tue, 1 Mar 2022 15:50:54 +0400 Subject: [PATCH 138/182] Update copyrights --- src/rum.h | 2 +- src/rum_arr_utils.c | 2 +- src/rum_ts_utils.c | 2 +- src/rumbtree.c | 2 +- src/rumbulk.c | 2 +- src/rumdatapage.c | 2 +- src/rumentrypage.c | 2 +- src/rumget.c | 2 +- src/ruminsert.c | 2 +- src/rumscan.c | 2 +- src/rumsort.c | 2 +- src/rumsort.h | 2 +- src/rumtsquery.c | 2 +- src/rumutil.c | 2 +- src/rumvacuum.c | 2 +- src/rumvalidate.c | 2 +- tests/pglist_tests.py | 2 +- 17 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/rum.h b/src/rum.h index 8f54edd5d4..5103935ab1 100644 --- a/src/rum.h +++ b/src/rum.h @@ -3,7 +3,7 @@ * rum.h * Exported definitions for RUM index. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 2006-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 86fab36074..ed7e6dacfb 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -3,7 +3,7 @@ * rum_arr_utils.c * various anyarray-search functions * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 9729f13f1f..536fc5375b 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -3,7 +3,7 @@ * rum_ts_utils.c * various text-search functions * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumbtree.c b/src/rumbtree.c index 2e1e520df9..2c168b100d 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumbulk.c b/src/rumbulk.c index b9e94df375..2b38bf46e3 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 999b90e726..1533adb89b 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumentrypage.c b/src/rumentrypage.c index c07fc3219a..9b759a1ff2 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumget.c b/src/rumget.c index c48d191288..3834394477 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -4,7 +4,7 @@ * fetch tuples from a RUM scan. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/ruminsert.c b/src/ruminsert.c index f42c8a9526..852ee0679b 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumscan.c b/src/rumscan.c index 8048215971..65d781c710 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.c b/src/rumsort.c index 9c08d74528..4ed425ccd2 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -8,7 +8,7 @@ * src/backend/utils/sort/tuplesort.c. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.h b/src/rumsort.h index 8b6c9a645f..dfa5117bf1 100644 --- a/src/rumsort.h +++ b/src/rumsort.h @@ -7,7 +7,7 @@ * It contains copy of static functions from * src/backend/utils/sort/tuplesort.c. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 0f10500c55..74189c37eb 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -3,7 +3,7 @@ * rumtsquery.c * Inverted fulltext search: indexing tsqueries. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumutil.c b/src/rumutil.c index 93a52f534f..a9ff1ef981 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 35cca00308..07a584121a 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres RUM * * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvalidate.c b/src/rumvalidate.c index 1d73e1b73b..a8c9e91a8f 100644 --- a/src/rumvalidate.c +++ b/src/rumvalidate.c @@ -3,7 +3,7 @@ * rumvalidate.c * Opclass validator for RUM. * - * Portions Copyright (c) 2015-2019, Postgres Professional + * Portions Copyright (c) 2015-2021, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/tests/pglist_tests.py b/tests/pglist_tests.py index a693a1b606..7b8e4558e0 100644 --- a/tests/pglist_tests.py +++ b/tests/pglist_tests.py @@ -2,7 +2,7 @@ """ Test RUM index with big base 'pglist' - Copyright (c) 2015-2016, Postgres Professional + Copyright (c) 2015-2021, Postgres Professional """ import unittest import os From 2b46914f82f40fe60829a226ae04336846b8c12b Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 6 May 2022 14:12:07 +0400 Subject: [PATCH 139/182] Compatibility with random access tuplesort interface changes in v15 Also update tuplesort15.c according to vanilla changes. --- src/rumsort.c | 17 ++- src/tuplesort15.c | 379 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 324 insertions(+), 72 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 4ed425ccd2..6a0c920d9f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -222,7 +222,11 @@ writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup (void *) &writtenlen, sizeof(writtenlen)); LogicalTapeWrite(TAPE(state, LT_ARG), (void *) item, size); - if (state->randomAccess) /* need trailing length word? */ +#if PG_VERSION_NUM >= 150000 + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ +#else + if (state->randomAccess) /* need trailing length word? */ +#endif LogicalTapeWrite(TAPE(state, LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); } @@ -260,11 +264,11 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, if (!is_item) stup->datum1 = Float8GetDatum(state->nKeys > 0 ? ((RumSortItem *) item)->data[0] : 0); - - if (state->randomAccess) /* need trailing length word? */ #if PG_VERSION_NUM >= 150000 + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ LogicalTapeReadExact(LT_ARG, &tuplen, sizeof(tuplen)); #else + if (state->randomAccess) LogicalTapeReadExact(state->tapeset, LT_ARG, &tuplen, sizeof(tuplen)); #endif } @@ -291,7 +295,14 @@ RumTuplesortstate * rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer) { +#if PG_VERSION_NUM >= 150000 + RumTuplesortstate *state = tuplesort_begin_common(workMem, + randomAccess ? + TUPLESORT_RANDOMACCESS : + TUPLESORT_NONE); +#else RumTuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); +#endif MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->sortcontext); diff --git a/src/tuplesort15.c b/src/tuplesort15.c index 90e26745df..e8da988a73 100644 --- a/src/tuplesort15.c +++ b/src/tuplesort15.c @@ -87,7 +87,7 @@ * produce exactly one output run from their partial input. * * - * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -246,7 +246,7 @@ struct Tuplesortstate { TupSortStatus status; /* enumerated value as shown above */ int nKeys; /* number of columns in sort key */ - bool randomAccess; /* did caller request random access? */ + int sortopt; /* Bitmask of flags used to setup sort */ bool bounded; /* did caller specify a maximum number of * tuples to return? */ bool boundUsed; /* true if we made use of a bounded heap */ @@ -306,6 +306,12 @@ struct Tuplesortstate void (*readtup) (Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len); + /* + * Whether SortTuple's datum1 and isnull1 members are maintained by the + * above routines. If not, some sort specializations are disabled. + */ + bool haveDatum1; + /* * This array holds the tuples now in sort memory. If we are in state * INITIAL, the tuples are in no particular order; if we are in state @@ -430,7 +436,11 @@ struct Tuplesortstate /* * This variable is shared by the single-key MinimalTuple case and the - * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + * Datum case (which both use qsort_ssup()). Otherwise, it's NULL. The + * presence of a value in this field is also checked by various sort + * specialization functions as an optimization when comparing the leading + * key in a tiebreak situation to determine if there are any subsequent + * keys to sort on. */ SortSupport onlyKey; @@ -459,6 +469,7 @@ struct Tuplesortstate /* These are specific to the index_btree subcase: */ bool enforceUnique; /* complain if we find duplicate tuples */ + bool uniqueNullsNotDistinct; /* unique constraint null treatment */ /* These are specific to the index_hash subcase: */ uint32 high_mask; /* masks for sortable part of hash code */ @@ -557,12 +568,12 @@ struct Sharedsort * may or may not match the in-memory representation of the tuple --- * any conversion needed is the job of the writetup and readtup routines. * - * If state->randomAccess is true, then the stored representation of the - * tuple must be followed by another "unsigned int" that is a copy of the - * length --- so the total tape space used is actually sizeof(unsigned int) - * more than the stored length value. This allows read-backwards. When - * randomAccess is not true, the write/read routines may omit the extra - * length word. + * If state->sortopt contains TUPLESORT_RANDOMACCESS, then the stored + * representation of the tuple must be followed by another "unsigned int" that + * is a copy of the length --- so the total tape space used is actually + * sizeof(unsigned int) more than the stored length value. This allows + * read-backwards. When the random access flag was not specified, the + * write/read routines may omit the extra length word. * * writetup is expected to write both length words as well as the tuple * data. When readtup is called, the tape is positioned just after the @@ -607,7 +618,7 @@ struct Sharedsort static Tuplesortstate *tuplesort_begin_common(int workMem, SortCoordinate coordinate, - bool randomAccess); + int sortopt); static void tuplesort_begin_batch(Tuplesortstate *state); static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); static bool consider_abort_common(Tuplesortstate *state); @@ -668,14 +679,124 @@ static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); static void tuplesort_free(Tuplesortstate *state); static void tuplesort_updatemax(Tuplesortstate *state); +/* + * Specialized comparators that we can inline into specialized sorts. The goal + * is to try to sort two tuples without having to follow the pointers to the + * comparator or the tuple. + * + * XXX: For now, these fall back to comparator functions that will compare the + * leading datum a second time. + * + * XXX: For now, there is no specialization for cases where datum1 is + * authoritative and we don't even need to fall back to a callback at all (that + * would be true for types like int4/int8/timestamp/date, but not true for + * abbreviations of text or multi-key sorts. There could be! Is it worth it? + */ + +/* Used if first key's comparator is ssup_datum_unsigned_compare */ +static pg_attribute_always_inline int +qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplyUnsignedSortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + &state->sortKeys[0]); + if (compare != 0) + return compare; + + /* + * No need to waste effort calling the tiebreak function when there are + * no other keys to sort on. + */ + if (state->onlyKey != NULL) + return 0; + + return state->comparetup(a, b, state); +} + +/* Used if first key's comparator is ssup_datum_signed_compare */ +static pg_attribute_always_inline int +qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySignedSortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + &state->sortKeys[0]); + + if (compare != 0) + return compare; + + /* + * No need to waste effort calling the tiebreak function when there are + * no other keys to sort on. + */ + if (state->onlyKey != NULL) + return 0; + + return state->comparetup(a, b, state); +} + +/* Used if first key's comparator is ssup_datum_int32_compare */ +static pg_attribute_always_inline int +qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplyInt32SortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + &state->sortKeys[0]); + + if (compare != 0) + return compare; + + /* + * No need to waste effort calling the tiebreak function when there are + * no other keys to sort on. + */ + if (state->onlyKey != NULL) + return 0; + + return state->comparetup(a, b, state); +} + /* * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts * any variant of SortTuples, using the appropriate comparetup function. * qsort_ssup() is specialized for the case where the comparetup function * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts - * and Datum sorts. + * and Datum sorts. qsort_tuple_{unsigned,signed,int32} are specialized for + * common comparison functions on pass-by-value leading datums. */ +#define ST_SORT qsort_tuple_unsigned +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, state) qsort_tuple_unsigned_compare(a, b, state) +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + +#define ST_SORT qsort_tuple_signed +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state) +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + +#define ST_SORT qsort_tuple_int32 +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, state) qsort_tuple_int32_compare(a, b, state) +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + #define ST_SORT qsort_tuple #define ST_ELEMENT_TYPE SortTuple #define ST_COMPARE_RUNTIME_POINTER @@ -712,21 +833,20 @@ static void tuplesort_updatemax(Tuplesortstate *state); * Each variant of tuplesort_begin has a workMem parameter specifying the * maximum number of kilobytes of RAM to use before spilling data to disk. * (The normal value of this parameter is work_mem, but some callers use - * other values.) Each variant also has a randomAccess parameter specifying - * whether the caller needs non-sequential access to the sort result. + * other values.) Each variant also has a sortopt which is a bitmask of + * sort options. See TUPLESORT_* definitions in tuplesort.h */ static Tuplesortstate * -tuplesort_begin_common(int workMem, SortCoordinate coordinate, - bool randomAccess) +tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt) { Tuplesortstate *state; MemoryContext maincontext; MemoryContext sortcontext; MemoryContext oldcontext; - /* See leader_takeover_tapes() remarks on randomAccess support */ - if (coordinate && randomAccess) + /* See leader_takeover_tapes() remarks on random access support */ + if (coordinate && (sortopt & TUPLESORT_RANDOMACCESS)) elog(ERROR, "random access disallowed under parallel sort"); /* @@ -763,7 +883,7 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, pg_rusage_init(&state->ru_start); #endif - state->randomAccess = randomAccess; + state->sortopt = sortopt; state->tuples = true; /* @@ -842,11 +962,21 @@ tuplesort_begin_batch(Tuplesortstate *state) * eases memory management. Resetting at key points reduces * fragmentation. Note that the memtuples array of SortTuples is allocated * in the parent context, not this context, because there is no need to - * free memtuples early. + * free memtuples early. For bounded sorts, tuples may be pfreed in any + * order, so we use a regular aset.c context so that it can make use of + * free'd memory. When the sort is not bounded, we make use of a + * generation.c context as this keeps allocations more compact with less + * wastage. Allocations are also slightly more CPU efficient. */ - state->tuplecontext = AllocSetContextCreate(state->sortcontext, - "Caller tuples", - ALLOCSET_DEFAULT_SIZES); + if (state->sortopt & TUPLESORT_ALLOWBOUNDED) + state->tuplecontext = AllocSetContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + else + state->tuplecontext = GenerationContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + state->status = TSS_INITIAL; state->bounded = false; @@ -897,10 +1027,10 @@ tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, - int workMem, SortCoordinate coordinate, bool randomAccess) + int workMem, SortCoordinate coordinate, int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; int i; @@ -912,7 +1042,7 @@ tuplesort_begin_heap(TupleDesc tupDesc, if (trace_sort) elog(LOG, "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", - nkeys, workMem, randomAccess ? 't' : 'f'); + nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = nkeys; @@ -921,13 +1051,14 @@ tuplesort_begin_heap(TupleDesc tupDesc, false, /* no unique check */ nkeys, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_heap; state->copytup = copytup_heap; state->writetup = writetup_heap; state->readtup = readtup_heap; + state->haveDatum1 = true; state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ state->abbrevNext = 10; @@ -947,7 +1078,7 @@ tuplesort_begin_heap(TupleDesc tupDesc, sortKey->ssup_nulls_first = nullsFirstFlags[i]; sortKey->ssup_attno = attNums[i]; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); } @@ -970,10 +1101,10 @@ Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, - SortCoordinate coordinate, bool randomAccess) + SortCoordinate coordinate, int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); BTScanInsert indexScanKey; MemoryContext oldcontext; int i; @@ -987,7 +1118,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, elog(LOG, "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", RelationGetNumberOfAttributes(indexRel), - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); @@ -996,7 +1127,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, false, /* no unique check */ state->nKeys, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_cluster; @@ -1007,6 +1138,15 @@ tuplesort_begin_cluster(TupleDesc tupDesc, state->indexInfo = BuildIndexInfo(indexRel); + /* + * If we don't have a simple leading attribute, we don't currently + * initialize datum1, so disable optimizations that require it. + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + state->haveDatum1 = false; + else + state->haveDatum1 = true; + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ indexScanKey = _bt_mkscankey(indexRel, NULL); @@ -1044,7 +1184,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; sortKey->ssup_attno = scanKey->sk_attno; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); AssertState(sortKey->ssup_attno != 0); @@ -1065,12 +1205,13 @@ Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, + bool uniqueNullsNotDistinct, int workMem, SortCoordinate coordinate, - bool randomAccess) + int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); BTScanInsert indexScanKey; MemoryContext oldcontext; int i; @@ -1082,7 +1223,7 @@ tuplesort_begin_index_btree(Relation heapRel, elog(LOG, "begin index sort: unique = %c, workMem = %d, randomAccess = %c", enforceUnique ? 't' : 'f', - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); @@ -1091,7 +1232,7 @@ tuplesort_begin_index_btree(Relation heapRel, enforceUnique, state->nKeys, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_index_btree; @@ -1099,10 +1240,12 @@ tuplesort_begin_index_btree(Relation heapRel, state->writetup = writetup_index; state->readtup = readtup_index; state->abbrevNext = 10; + state->haveDatum1 = true; state->heapRel = heapRel; state->indexRel = indexRel; state->enforceUnique = enforceUnique; + state->uniqueNullsNotDistinct = uniqueNullsNotDistinct; indexScanKey = _bt_mkscankey(indexRel, NULL); @@ -1122,7 +1265,7 @@ tuplesort_begin_index_btree(Relation heapRel, (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; sortKey->ssup_attno = scanKey->sk_attno; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); AssertState(sortKey->ssup_attno != 0); @@ -1147,10 +1290,10 @@ tuplesort_begin_index_hash(Relation heapRel, uint32 max_buckets, int workMem, SortCoordinate coordinate, - bool randomAccess) + int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->maincontext); @@ -1163,7 +1306,8 @@ tuplesort_begin_index_hash(Relation heapRel, high_mask, low_mask, max_buckets, - workMem, randomAccess ? 't' : 'f'); + workMem, + sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = 1; /* Only one sort column, the hash code */ @@ -1172,6 +1316,7 @@ tuplesort_begin_index_hash(Relation heapRel, state->copytup = copytup_index; state->writetup = writetup_index; state->readtup = readtup_index; + state->haveDatum1 = true; state->heapRel = heapRel; state->indexRel = indexRel; @@ -1190,10 +1335,10 @@ tuplesort_begin_index_gist(Relation heapRel, Relation indexRel, int workMem, SortCoordinate coordinate, - bool randomAccess) + int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; int i; @@ -1203,7 +1348,7 @@ tuplesort_begin_index_gist(Relation heapRel, if (trace_sort) elog(LOG, "begin index sort: workMem = %d, randomAccess = %c", - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); @@ -1212,6 +1357,7 @@ tuplesort_begin_index_gist(Relation heapRel, state->copytup = copytup_index; state->writetup = writetup_index; state->readtup = readtup_index; + state->haveDatum1 = true; state->heapRel = heapRel; state->indexRel = indexRel; @@ -1229,7 +1375,7 @@ tuplesort_begin_index_gist(Relation heapRel, sortKey->ssup_nulls_first = false; sortKey->ssup_attno = i + 1; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); AssertState(sortKey->ssup_attno != 0); @@ -1245,10 +1391,10 @@ tuplesort_begin_index_gist(Relation heapRel, Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, - SortCoordinate coordinate, bool randomAccess) + SortCoordinate coordinate, int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; int16 typlen; bool typbyval; @@ -1259,7 +1405,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, if (trace_sort) elog(LOG, "begin datum sort: workMem = %d, randomAccess = %c", - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = 1; /* always a one-column sort */ @@ -1268,7 +1414,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, false, /* no unique check */ 1, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_datum; @@ -1276,6 +1422,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, state->writetup = writetup_datum; state->readtup = readtup_datum; state->abbrevNext = 10; + state->haveDatum1 = true; state->datumType = datumType; @@ -1334,6 +1481,8 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound) { /* Assert we're called before loading any tuples */ Assert(state->status == TSS_INITIAL && state->memtupcount == 0); + /* Assert we allow bounded sorts */ + Assert(state->sortopt & TUPLESORT_ALLOWBOUNDED); /* Can't set the bound twice, either */ Assert(!state->bounded); /* Also, this shouldn't be called in a parallel worker */ @@ -2162,7 +2311,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, switch (state->status) { case TSS_SORTEDINMEM: - Assert(forward || state->randomAccess); + Assert(forward || state->sortopt & TUPLESORT_RANDOMACCESS); Assert(!state->slabAllocatorUsed); if (forward) { @@ -2206,7 +2355,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, break; case TSS_SORTEDONTAPE: - Assert(forward || state->randomAccess); + Assert(forward || state->sortopt & TUPLESORT_RANDOMACCESS); Assert(state->slabAllocatorUsed); /* @@ -2981,7 +3130,8 @@ mergeruns(Tuplesortstate *state) * sorted tape, we can stop at this point and do the final merge * on-the-fly. */ - if (!state->randomAccess && state->nInputRuns <= state->nInputTapes + if ((state->sortopt & TUPLESORT_RANDOMACCESS) == 0 + && state->nInputRuns <= state->nInputTapes && !WORKER(state)) { /* Tell logtape.c we won't be writing anymore */ @@ -3063,7 +3213,6 @@ mergeonerun(Tuplesortstate *state) { stup.srctape = srcTapeIndex; tuplesort_heap_replace_top(state, &stup); - } else { @@ -3227,7 +3376,7 @@ tuplesort_rescan(Tuplesortstate *state) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - Assert(state->randomAccess); + Assert(state->sortopt & TUPLESORT_RANDOMACCESS); switch (state->status) { @@ -3260,7 +3409,7 @@ tuplesort_markpos(Tuplesortstate *state) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - Assert(state->randomAccess); + Assert(state->sortopt & TUPLESORT_RANDOMACCESS); switch (state->status) { @@ -3291,7 +3440,7 @@ tuplesort_restorepos(Tuplesortstate *state) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - Assert(state->randomAccess); + Assert(state->sortopt & TUPLESORT_RANDOMACCESS); switch (state->status) { @@ -3503,15 +3652,53 @@ tuplesort_sort_memtuples(Tuplesortstate *state) if (state->memtupcount > 1) { + /* + * Do we have the leading column's value or abbreviation in datum1, + * and is there a specialization for its comparator? + */ + if (state->haveDatum1 && state->sortKeys) + { + if (state->sortKeys[0].comparator == ssup_datum_unsigned_cmp) + { + elog(DEBUG1, "qsort_tuple_unsigned"); + qsort_tuple_unsigned(state->memtuples, + state->memtupcount, + state); + return; + } + else if (state->sortKeys[0].comparator == ssup_datum_signed_cmp) + { + elog(DEBUG1, "qsort_tuple_signed"); + qsort_tuple_signed(state->memtuples, + state->memtupcount, + state); + return; + } + else if (state->sortKeys[0].comparator == ssup_datum_int32_cmp) + { + elog(DEBUG1, "qsort_tuple_int32"); + qsort_tuple_int32(state->memtuples, + state->memtupcount, + state); + return; + } + } + /* Can we use the single-key sort function? */ if (state->onlyKey != NULL) + { + elog(DEBUG1, "qsort_ssup"); qsort_ssup(state->memtuples, state->memtupcount, state->onlyKey); + } else + { + elog(DEBUG1, "qsort_tuple"); qsort_tuple(state->memtuples, state->memtupcount, state->comparetup, state); + } } } @@ -3850,7 +4037,8 @@ writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, (void *) tupbody, tupbodylen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); if (!state->slabAllocatorUsed) @@ -3873,7 +4061,8 @@ readtup_heap(Tuplesortstate *state, SortTuple *stup, /* read in the tuple proper */ tuple->t_len = tuplen; LogicalTapeReadExact(tape, tupbody, tupbodylen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value */ @@ -3904,7 +4093,6 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, datum2; bool isnull1, isnull2; - AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; /* Be prepared to compare additional sort keys */ ltup = (HeapTuple) a->tuple; @@ -3912,7 +4100,7 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, tupDesc = state->tupDesc; /* Compare the leading sort key, if it's simple */ - if (leading != 0) + if (state->haveDatum1) { compare = ApplySortComparator(a->datum1, a->isnull1, b->datum1, b->isnull1, @@ -3922,6 +4110,8 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, if (sortKey->abbrev_converter) { + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); @@ -4019,7 +4209,7 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) * set up first-column key value, and potentially abbreviate, if it's a * simple column */ - if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + if (!state->haveDatum1) return; original = heap_getattr(tuple, @@ -4084,7 +4274,8 @@ writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); if (!state->slabAllocatorUsed) @@ -4110,11 +4301,12 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, tuple->t_tableOid = InvalidOid; /* Read in the tuple body */ LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value, if it's a simple column */ - if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + if (state->haveDatum1) stup->datum1 = heap_getattr(tuple, state->indexInfo->ii_IndexAttrNumbers[0], state->tupDesc, @@ -4200,14 +4392,15 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b, /* * If btree has asked us to enforce uniqueness, complain if two equal - * tuples are detected (unless there was at least one NULL field). + * tuples are detected (unless there was at least one NULL field and NULLS + * NOT DISTINCT was not set). * * It is sufficient to make the test here, because if two tuples are equal * they *must* get compared at some stage of the sort --- otherwise the * sort algorithm wouldn't have checked whether one must appear before the * other. */ - if (state->enforceUnique && !equal_hasnull) + if (state->enforceUnique && !(!state->uniqueNullsNotDistinct && equal_hasnull)) { Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; @@ -4333,7 +4526,8 @@ writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) tuplen = IndexTupleSize(tuple) + sizeof(tuplen); LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, (void *) tuple, IndexTupleSize(tuple)); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); if (!state->slabAllocatorUsed) @@ -4351,7 +4545,8 @@ readtup_index(Tuplesortstate *state, SortTuple *stup, IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); LogicalTapeReadExact(tape, tuple, tuplen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value */ @@ -4421,7 +4616,8 @@ writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen)); LogicalTapeWrite(tape, waddr, tuplen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen)); if (!state->slabAllocatorUsed && stup->tuple) @@ -4461,7 +4657,8 @@ readtup_datum(Tuplesortstate *state, SortTuple *stup, stup->tuple = raddr; } - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); } @@ -4696,3 +4893,47 @@ free_sort_tuple(Tuplesortstate *state, SortTuple *stup) stup->tuple = NULL; } } + +int +ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup) +{ + if (x < y) + return -1; + else if (x > y) + return 1; + else + return 0; +} + +int +ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup) +{ +#if SIZEOF_DATUM == 8 + int64 xx = (int64) x; + int64 yy = (int64) y; +#else + int32 xx = (int32) x; + int32 yy = (int32) y; +#endif + + if (xx < yy) + return -1; + else if (xx > yy) + return 1; + else + return 0; +} + +int +ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup) +{ + int32 xx = (int32) x; + int32 yy = (int32) y; + + if (xx < yy) + return -1; + else if (xx > yy) + return 1; + else + return 0; +} From 5e8be0276192745c0fc0646fced74637d8652868 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 6 May 2022 14:12:07 +0400 Subject: [PATCH 140/182] Compatibility with random access tuplesort interface changes in v15 Also update tuplesort15.c according to vanilla changes. --- src/rumsort.c | 17 ++- src/tuplesort15.c | 379 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 324 insertions(+), 72 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 4ed425ccd2..6a0c920d9f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -222,7 +222,11 @@ writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup (void *) &writtenlen, sizeof(writtenlen)); LogicalTapeWrite(TAPE(state, LT_ARG), (void *) item, size); - if (state->randomAccess) /* need trailing length word? */ +#if PG_VERSION_NUM >= 150000 + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ +#else + if (state->randomAccess) /* need trailing length word? */ +#endif LogicalTapeWrite(TAPE(state, LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); } @@ -260,11 +264,11 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, if (!is_item) stup->datum1 = Float8GetDatum(state->nKeys > 0 ? ((RumSortItem *) item)->data[0] : 0); - - if (state->randomAccess) /* need trailing length word? */ #if PG_VERSION_NUM >= 150000 + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ LogicalTapeReadExact(LT_ARG, &tuplen, sizeof(tuplen)); #else + if (state->randomAccess) LogicalTapeReadExact(state->tapeset, LT_ARG, &tuplen, sizeof(tuplen)); #endif } @@ -291,7 +295,14 @@ RumTuplesortstate * rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer) { +#if PG_VERSION_NUM >= 150000 + RumTuplesortstate *state = tuplesort_begin_common(workMem, + randomAccess ? + TUPLESORT_RANDOMACCESS : + TUPLESORT_NONE); +#else RumTuplesortstate *state = tuplesort_begin_common(workMem, randomAccess); +#endif MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->sortcontext); diff --git a/src/tuplesort15.c b/src/tuplesort15.c index 90e26745df..e8da988a73 100644 --- a/src/tuplesort15.c +++ b/src/tuplesort15.c @@ -87,7 +87,7 @@ * produce exactly one output run from their partial input. * * - * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION @@ -246,7 +246,7 @@ struct Tuplesortstate { TupSortStatus status; /* enumerated value as shown above */ int nKeys; /* number of columns in sort key */ - bool randomAccess; /* did caller request random access? */ + int sortopt; /* Bitmask of flags used to setup sort */ bool bounded; /* did caller specify a maximum number of * tuples to return? */ bool boundUsed; /* true if we made use of a bounded heap */ @@ -306,6 +306,12 @@ struct Tuplesortstate void (*readtup) (Tuplesortstate *state, SortTuple *stup, LogicalTape *tape, unsigned int len); + /* + * Whether SortTuple's datum1 and isnull1 members are maintained by the + * above routines. If not, some sort specializations are disabled. + */ + bool haveDatum1; + /* * This array holds the tuples now in sort memory. If we are in state * INITIAL, the tuples are in no particular order; if we are in state @@ -430,7 +436,11 @@ struct Tuplesortstate /* * This variable is shared by the single-key MinimalTuple case and the - * Datum case (which both use qsort_ssup()). Otherwise it's NULL. + * Datum case (which both use qsort_ssup()). Otherwise, it's NULL. The + * presence of a value in this field is also checked by various sort + * specialization functions as an optimization when comparing the leading + * key in a tiebreak situation to determine if there are any subsequent + * keys to sort on. */ SortSupport onlyKey; @@ -459,6 +469,7 @@ struct Tuplesortstate /* These are specific to the index_btree subcase: */ bool enforceUnique; /* complain if we find duplicate tuples */ + bool uniqueNullsNotDistinct; /* unique constraint null treatment */ /* These are specific to the index_hash subcase: */ uint32 high_mask; /* masks for sortable part of hash code */ @@ -557,12 +568,12 @@ struct Sharedsort * may or may not match the in-memory representation of the tuple --- * any conversion needed is the job of the writetup and readtup routines. * - * If state->randomAccess is true, then the stored representation of the - * tuple must be followed by another "unsigned int" that is a copy of the - * length --- so the total tape space used is actually sizeof(unsigned int) - * more than the stored length value. This allows read-backwards. When - * randomAccess is not true, the write/read routines may omit the extra - * length word. + * If state->sortopt contains TUPLESORT_RANDOMACCESS, then the stored + * representation of the tuple must be followed by another "unsigned int" that + * is a copy of the length --- so the total tape space used is actually + * sizeof(unsigned int) more than the stored length value. This allows + * read-backwards. When the random access flag was not specified, the + * write/read routines may omit the extra length word. * * writetup is expected to write both length words as well as the tuple * data. When readtup is called, the tape is positioned just after the @@ -607,7 +618,7 @@ struct Sharedsort static Tuplesortstate *tuplesort_begin_common(int workMem, SortCoordinate coordinate, - bool randomAccess); + int sortopt); static void tuplesort_begin_batch(Tuplesortstate *state); static void puttuple_common(Tuplesortstate *state, SortTuple *tuple); static bool consider_abort_common(Tuplesortstate *state); @@ -668,14 +679,124 @@ static void free_sort_tuple(Tuplesortstate *state, SortTuple *stup); static void tuplesort_free(Tuplesortstate *state); static void tuplesort_updatemax(Tuplesortstate *state); +/* + * Specialized comparators that we can inline into specialized sorts. The goal + * is to try to sort two tuples without having to follow the pointers to the + * comparator or the tuple. + * + * XXX: For now, these fall back to comparator functions that will compare the + * leading datum a second time. + * + * XXX: For now, there is no specialization for cases where datum1 is + * authoritative and we don't even need to fall back to a callback at all (that + * would be true for types like int4/int8/timestamp/date, but not true for + * abbreviations of text or multi-key sorts. There could be! Is it worth it? + */ + +/* Used if first key's comparator is ssup_datum_unsigned_compare */ +static pg_attribute_always_inline int +qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplyUnsignedSortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + &state->sortKeys[0]); + if (compare != 0) + return compare; + + /* + * No need to waste effort calling the tiebreak function when there are + * no other keys to sort on. + */ + if (state->onlyKey != NULL) + return 0; + + return state->comparetup(a, b, state); +} + +/* Used if first key's comparator is ssup_datum_signed_compare */ +static pg_attribute_always_inline int +qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplySignedSortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + &state->sortKeys[0]); + + if (compare != 0) + return compare; + + /* + * No need to waste effort calling the tiebreak function when there are + * no other keys to sort on. + */ + if (state->onlyKey != NULL) + return 0; + + return state->comparetup(a, b, state); +} + +/* Used if first key's comparator is ssup_datum_int32_compare */ +static pg_attribute_always_inline int +qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) +{ + int compare; + + compare = ApplyInt32SortComparator(a->datum1, a->isnull1, + b->datum1, b->isnull1, + &state->sortKeys[0]); + + if (compare != 0) + return compare; + + /* + * No need to waste effort calling the tiebreak function when there are + * no other keys to sort on. + */ + if (state->onlyKey != NULL) + return 0; + + return state->comparetup(a, b, state); +} + /* * Special versions of qsort just for SortTuple objects. qsort_tuple() sorts * any variant of SortTuples, using the appropriate comparetup function. * qsort_ssup() is specialized for the case where the comparetup function * reduces to ApplySortComparator(), that is single-key MinimalTuple sorts - * and Datum sorts. + * and Datum sorts. qsort_tuple_{unsigned,signed,int32} are specialized for + * common comparison functions on pass-by-value leading datums. */ +#define ST_SORT qsort_tuple_unsigned +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, state) qsort_tuple_unsigned_compare(a, b, state) +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + +#define ST_SORT qsort_tuple_signed +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state) +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + +#define ST_SORT qsort_tuple_int32 +#define ST_ELEMENT_TYPE SortTuple +#define ST_COMPARE(a, b, state) qsort_tuple_int32_compare(a, b, state) +#define ST_COMPARE_ARG_TYPE Tuplesortstate +#define ST_CHECK_FOR_INTERRUPTS +#define ST_SCOPE static +#define ST_DEFINE +#include "lib/sort_template.h" + #define ST_SORT qsort_tuple #define ST_ELEMENT_TYPE SortTuple #define ST_COMPARE_RUNTIME_POINTER @@ -712,21 +833,20 @@ static void tuplesort_updatemax(Tuplesortstate *state); * Each variant of tuplesort_begin has a workMem parameter specifying the * maximum number of kilobytes of RAM to use before spilling data to disk. * (The normal value of this parameter is work_mem, but some callers use - * other values.) Each variant also has a randomAccess parameter specifying - * whether the caller needs non-sequential access to the sort result. + * other values.) Each variant also has a sortopt which is a bitmask of + * sort options. See TUPLESORT_* definitions in tuplesort.h */ static Tuplesortstate * -tuplesort_begin_common(int workMem, SortCoordinate coordinate, - bool randomAccess) +tuplesort_begin_common(int workMem, SortCoordinate coordinate, int sortopt) { Tuplesortstate *state; MemoryContext maincontext; MemoryContext sortcontext; MemoryContext oldcontext; - /* See leader_takeover_tapes() remarks on randomAccess support */ - if (coordinate && randomAccess) + /* See leader_takeover_tapes() remarks on random access support */ + if (coordinate && (sortopt & TUPLESORT_RANDOMACCESS)) elog(ERROR, "random access disallowed under parallel sort"); /* @@ -763,7 +883,7 @@ tuplesort_begin_common(int workMem, SortCoordinate coordinate, pg_rusage_init(&state->ru_start); #endif - state->randomAccess = randomAccess; + state->sortopt = sortopt; state->tuples = true; /* @@ -842,11 +962,21 @@ tuplesort_begin_batch(Tuplesortstate *state) * eases memory management. Resetting at key points reduces * fragmentation. Note that the memtuples array of SortTuples is allocated * in the parent context, not this context, because there is no need to - * free memtuples early. + * free memtuples early. For bounded sorts, tuples may be pfreed in any + * order, so we use a regular aset.c context so that it can make use of + * free'd memory. When the sort is not bounded, we make use of a + * generation.c context as this keeps allocations more compact with less + * wastage. Allocations are also slightly more CPU efficient. */ - state->tuplecontext = AllocSetContextCreate(state->sortcontext, - "Caller tuples", - ALLOCSET_DEFAULT_SIZES); + if (state->sortopt & TUPLESORT_ALLOWBOUNDED) + state->tuplecontext = AllocSetContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + else + state->tuplecontext = GenerationContextCreate(state->sortcontext, + "Caller tuples", + ALLOCSET_DEFAULT_SIZES); + state->status = TSS_INITIAL; state->bounded = false; @@ -897,10 +1027,10 @@ tuplesort_begin_heap(TupleDesc tupDesc, int nkeys, AttrNumber *attNums, Oid *sortOperators, Oid *sortCollations, bool *nullsFirstFlags, - int workMem, SortCoordinate coordinate, bool randomAccess) + int workMem, SortCoordinate coordinate, int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; int i; @@ -912,7 +1042,7 @@ tuplesort_begin_heap(TupleDesc tupDesc, if (trace_sort) elog(LOG, "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", - nkeys, workMem, randomAccess ? 't' : 'f'); + nkeys, workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = nkeys; @@ -921,13 +1051,14 @@ tuplesort_begin_heap(TupleDesc tupDesc, false, /* no unique check */ nkeys, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_heap; state->copytup = copytup_heap; state->writetup = writetup_heap; state->readtup = readtup_heap; + state->haveDatum1 = true; state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ state->abbrevNext = 10; @@ -947,7 +1078,7 @@ tuplesort_begin_heap(TupleDesc tupDesc, sortKey->ssup_nulls_first = nullsFirstFlags[i]; sortKey->ssup_attno = attNums[i]; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey); } @@ -970,10 +1101,10 @@ Tuplesortstate * tuplesort_begin_cluster(TupleDesc tupDesc, Relation indexRel, int workMem, - SortCoordinate coordinate, bool randomAccess) + SortCoordinate coordinate, int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); BTScanInsert indexScanKey; MemoryContext oldcontext; int i; @@ -987,7 +1118,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, elog(LOG, "begin tuple sort: nkeys = %d, workMem = %d, randomAccess = %c", RelationGetNumberOfAttributes(indexRel), - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); @@ -996,7 +1127,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, false, /* no unique check */ state->nKeys, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_cluster; @@ -1007,6 +1138,15 @@ tuplesort_begin_cluster(TupleDesc tupDesc, state->indexInfo = BuildIndexInfo(indexRel); + /* + * If we don't have a simple leading attribute, we don't currently + * initialize datum1, so disable optimizations that require it. + */ + if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + state->haveDatum1 = false; + else + state->haveDatum1 = true; + state->tupDesc = tupDesc; /* assume we need not copy tupDesc */ indexScanKey = _bt_mkscankey(indexRel, NULL); @@ -1044,7 +1184,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc, (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; sortKey->ssup_attno = scanKey->sk_attno; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); AssertState(sortKey->ssup_attno != 0); @@ -1065,12 +1205,13 @@ Tuplesortstate * tuplesort_begin_index_btree(Relation heapRel, Relation indexRel, bool enforceUnique, + bool uniqueNullsNotDistinct, int workMem, SortCoordinate coordinate, - bool randomAccess) + int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); BTScanInsert indexScanKey; MemoryContext oldcontext; int i; @@ -1082,7 +1223,7 @@ tuplesort_begin_index_btree(Relation heapRel, elog(LOG, "begin index sort: unique = %c, workMem = %d, randomAccess = %c", enforceUnique ? 't' : 'f', - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); @@ -1091,7 +1232,7 @@ tuplesort_begin_index_btree(Relation heapRel, enforceUnique, state->nKeys, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_index_btree; @@ -1099,10 +1240,12 @@ tuplesort_begin_index_btree(Relation heapRel, state->writetup = writetup_index; state->readtup = readtup_index; state->abbrevNext = 10; + state->haveDatum1 = true; state->heapRel = heapRel; state->indexRel = indexRel; state->enforceUnique = enforceUnique; + state->uniqueNullsNotDistinct = uniqueNullsNotDistinct; indexScanKey = _bt_mkscankey(indexRel, NULL); @@ -1122,7 +1265,7 @@ tuplesort_begin_index_btree(Relation heapRel, (scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0; sortKey->ssup_attno = scanKey->sk_attno; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); AssertState(sortKey->ssup_attno != 0); @@ -1147,10 +1290,10 @@ tuplesort_begin_index_hash(Relation heapRel, uint32 max_buckets, int workMem, SortCoordinate coordinate, - bool randomAccess) + int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; oldcontext = MemoryContextSwitchTo(state->maincontext); @@ -1163,7 +1306,8 @@ tuplesort_begin_index_hash(Relation heapRel, high_mask, low_mask, max_buckets, - workMem, randomAccess ? 't' : 'f'); + workMem, + sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = 1; /* Only one sort column, the hash code */ @@ -1172,6 +1316,7 @@ tuplesort_begin_index_hash(Relation heapRel, state->copytup = copytup_index; state->writetup = writetup_index; state->readtup = readtup_index; + state->haveDatum1 = true; state->heapRel = heapRel; state->indexRel = indexRel; @@ -1190,10 +1335,10 @@ tuplesort_begin_index_gist(Relation heapRel, Relation indexRel, int workMem, SortCoordinate coordinate, - bool randomAccess) + int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; int i; @@ -1203,7 +1348,7 @@ tuplesort_begin_index_gist(Relation heapRel, if (trace_sort) elog(LOG, "begin index sort: workMem = %d, randomAccess = %c", - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel); @@ -1212,6 +1357,7 @@ tuplesort_begin_index_gist(Relation heapRel, state->copytup = copytup_index; state->writetup = writetup_index; state->readtup = readtup_index; + state->haveDatum1 = true; state->heapRel = heapRel; state->indexRel = indexRel; @@ -1229,7 +1375,7 @@ tuplesort_begin_index_gist(Relation heapRel, sortKey->ssup_nulls_first = false; sortKey->ssup_attno = i + 1; /* Convey if abbreviation optimization is applicable in principle */ - sortKey->abbreviate = (i == 0); + sortKey->abbreviate = (i == 0 && state->haveDatum1); AssertState(sortKey->ssup_attno != 0); @@ -1245,10 +1391,10 @@ tuplesort_begin_index_gist(Relation heapRel, Tuplesortstate * tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, bool nullsFirstFlag, int workMem, - SortCoordinate coordinate, bool randomAccess) + SortCoordinate coordinate, int sortopt) { Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate, - randomAccess); + sortopt); MemoryContext oldcontext; int16 typlen; bool typbyval; @@ -1259,7 +1405,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, if (trace_sort) elog(LOG, "begin datum sort: workMem = %d, randomAccess = %c", - workMem, randomAccess ? 't' : 'f'); + workMem, sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f'); #endif state->nKeys = 1; /* always a one-column sort */ @@ -1268,7 +1414,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, false, /* no unique check */ 1, workMem, - randomAccess, + sortopt & TUPLESORT_RANDOMACCESS, PARALLEL_SORT(state)); state->comparetup = comparetup_datum; @@ -1276,6 +1422,7 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation, state->writetup = writetup_datum; state->readtup = readtup_datum; state->abbrevNext = 10; + state->haveDatum1 = true; state->datumType = datumType; @@ -1334,6 +1481,8 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound) { /* Assert we're called before loading any tuples */ Assert(state->status == TSS_INITIAL && state->memtupcount == 0); + /* Assert we allow bounded sorts */ + Assert(state->sortopt & TUPLESORT_ALLOWBOUNDED); /* Can't set the bound twice, either */ Assert(!state->bounded); /* Also, this shouldn't be called in a parallel worker */ @@ -2162,7 +2311,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, switch (state->status) { case TSS_SORTEDINMEM: - Assert(forward || state->randomAccess); + Assert(forward || state->sortopt & TUPLESORT_RANDOMACCESS); Assert(!state->slabAllocatorUsed); if (forward) { @@ -2206,7 +2355,7 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward, break; case TSS_SORTEDONTAPE: - Assert(forward || state->randomAccess); + Assert(forward || state->sortopt & TUPLESORT_RANDOMACCESS); Assert(state->slabAllocatorUsed); /* @@ -2981,7 +3130,8 @@ mergeruns(Tuplesortstate *state) * sorted tape, we can stop at this point and do the final merge * on-the-fly. */ - if (!state->randomAccess && state->nInputRuns <= state->nInputTapes + if ((state->sortopt & TUPLESORT_RANDOMACCESS) == 0 + && state->nInputRuns <= state->nInputTapes && !WORKER(state)) { /* Tell logtape.c we won't be writing anymore */ @@ -3063,7 +3213,6 @@ mergeonerun(Tuplesortstate *state) { stup.srctape = srcTapeIndex; tuplesort_heap_replace_top(state, &stup); - } else { @@ -3227,7 +3376,7 @@ tuplesort_rescan(Tuplesortstate *state) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - Assert(state->randomAccess); + Assert(state->sortopt & TUPLESORT_RANDOMACCESS); switch (state->status) { @@ -3260,7 +3409,7 @@ tuplesort_markpos(Tuplesortstate *state) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - Assert(state->randomAccess); + Assert(state->sortopt & TUPLESORT_RANDOMACCESS); switch (state->status) { @@ -3291,7 +3440,7 @@ tuplesort_restorepos(Tuplesortstate *state) { MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext); - Assert(state->randomAccess); + Assert(state->sortopt & TUPLESORT_RANDOMACCESS); switch (state->status) { @@ -3503,15 +3652,53 @@ tuplesort_sort_memtuples(Tuplesortstate *state) if (state->memtupcount > 1) { + /* + * Do we have the leading column's value or abbreviation in datum1, + * and is there a specialization for its comparator? + */ + if (state->haveDatum1 && state->sortKeys) + { + if (state->sortKeys[0].comparator == ssup_datum_unsigned_cmp) + { + elog(DEBUG1, "qsort_tuple_unsigned"); + qsort_tuple_unsigned(state->memtuples, + state->memtupcount, + state); + return; + } + else if (state->sortKeys[0].comparator == ssup_datum_signed_cmp) + { + elog(DEBUG1, "qsort_tuple_signed"); + qsort_tuple_signed(state->memtuples, + state->memtupcount, + state); + return; + } + else if (state->sortKeys[0].comparator == ssup_datum_int32_cmp) + { + elog(DEBUG1, "qsort_tuple_int32"); + qsort_tuple_int32(state->memtuples, + state->memtupcount, + state); + return; + } + } + /* Can we use the single-key sort function? */ if (state->onlyKey != NULL) + { + elog(DEBUG1, "qsort_ssup"); qsort_ssup(state->memtuples, state->memtupcount, state->onlyKey); + } else + { + elog(DEBUG1, "qsort_tuple"); qsort_tuple(state->memtuples, state->memtupcount, state->comparetup, state); + } } } @@ -3850,7 +4037,8 @@ writetup_heap(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, (void *) tupbody, tupbodylen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); if (!state->slabAllocatorUsed) @@ -3873,7 +4061,8 @@ readtup_heap(Tuplesortstate *state, SortTuple *stup, /* read in the tuple proper */ tuple->t_len = tuplen; LogicalTapeReadExact(tape, tupbody, tupbodylen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value */ @@ -3904,7 +4093,6 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, datum2; bool isnull1, isnull2; - AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; /* Be prepared to compare additional sort keys */ ltup = (HeapTuple) a->tuple; @@ -3912,7 +4100,7 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, tupDesc = state->tupDesc; /* Compare the leading sort key, if it's simple */ - if (leading != 0) + if (state->haveDatum1) { compare = ApplySortComparator(a->datum1, a->isnull1, b->datum1, b->isnull1, @@ -3922,6 +4110,8 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b, if (sortKey->abbrev_converter) { + AttrNumber leading = state->indexInfo->ii_IndexAttrNumbers[0]; + datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1); datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2); @@ -4019,7 +4209,7 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup) * set up first-column key value, and potentially abbreviate, if it's a * simple column */ - if (state->indexInfo->ii_IndexAttrNumbers[0] == 0) + if (!state->haveDatum1) return; original = heap_getattr(tuple, @@ -4084,7 +4274,8 @@ writetup_cluster(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, &tuple->t_self, sizeof(ItemPointerData)); LogicalTapeWrite(tape, tuple->t_data, tuple->t_len); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, &tuplen, sizeof(tuplen)); if (!state->slabAllocatorUsed) @@ -4110,11 +4301,12 @@ readtup_cluster(Tuplesortstate *state, SortTuple *stup, tuple->t_tableOid = InvalidOid; /* Read in the tuple body */ LogicalTapeReadExact(tape, tuple->t_data, tuple->t_len); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value, if it's a simple column */ - if (state->indexInfo->ii_IndexAttrNumbers[0] != 0) + if (state->haveDatum1) stup->datum1 = heap_getattr(tuple, state->indexInfo->ii_IndexAttrNumbers[0], state->tupDesc, @@ -4200,14 +4392,15 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b, /* * If btree has asked us to enforce uniqueness, complain if two equal - * tuples are detected (unless there was at least one NULL field). + * tuples are detected (unless there was at least one NULL field and NULLS + * NOT DISTINCT was not set). * * It is sufficient to make the test here, because if two tuples are equal * they *must* get compared at some stage of the sort --- otherwise the * sort algorithm wouldn't have checked whether one must appear before the * other. */ - if (state->enforceUnique && !equal_hasnull) + if (state->enforceUnique && !(!state->uniqueNullsNotDistinct && equal_hasnull)) { Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; @@ -4333,7 +4526,8 @@ writetup_index(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) tuplen = IndexTupleSize(tuple) + sizeof(tuplen); LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); LogicalTapeWrite(tape, (void *) tuple, IndexTupleSize(tuple)); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, (void *) &tuplen, sizeof(tuplen)); if (!state->slabAllocatorUsed) @@ -4351,7 +4545,8 @@ readtup_index(Tuplesortstate *state, SortTuple *stup, IndexTuple tuple = (IndexTuple) readtup_alloc(state, tuplen); LogicalTapeReadExact(tape, tuple, tuplen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); stup->tuple = (void *) tuple; /* set up first-column key value */ @@ -4421,7 +4616,8 @@ writetup_datum(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup) LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen)); LogicalTapeWrite(tape, waddr, tuplen); - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeWrite(tape, (void *) &writtenlen, sizeof(writtenlen)); if (!state->slabAllocatorUsed && stup->tuple) @@ -4461,7 +4657,8 @@ readtup_datum(Tuplesortstate *state, SortTuple *stup, stup->tuple = raddr; } - if (state->randomAccess) /* need trailing length word? */ + if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length + * word? */ LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen)); } @@ -4696,3 +4893,47 @@ free_sort_tuple(Tuplesortstate *state, SortTuple *stup) stup->tuple = NULL; } } + +int +ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup) +{ + if (x < y) + return -1; + else if (x > y) + return 1; + else + return 0; +} + +int +ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup) +{ +#if SIZEOF_DATUM == 8 + int64 xx = (int64) x; + int64 yy = (int64) y; +#else + int32 xx = (int32) x; + int32 yy = (int32) y; +#endif + + if (xx < yy) + return -1; + else if (xx > yy) + return 1; + else + return 0; +} + +int +ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup) +{ + int32 xx = (int32) x; + int32 yy = (int32) y; + + if (xx < yy) + return -1; + else if (xx > yy) + return 1; + else + return 0; +} From 1c6d6165ff7dab91fad9af0f6172eee6077b5529 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 26 May 2022 00:28:08 +0400 Subject: [PATCH 141/182] Update copyrights. Stamp 1.3.10 --- README.md | 4 ++++ src/rum.h | 4 ++-- src/rum_arr_utils.c | 2 +- src/rum_ts_utils.c | 4 ++-- src/rumbtree.c | 2 +- src/rumbulk.c | 2 +- src/rumdatapage.c | 2 +- src/rumentrypage.c | 2 +- src/rumget.c | 4 ++-- src/ruminsert.c | 2 +- src/rumscan.c | 2 +- src/rumsort.c | 4 ++-- src/rumtsquery.c | 2 +- src/rumutil.c | 2 +- src/rumvacuum.c | 2 +- src/rumvalidate.c | 2 +- 16 files changed, 23 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 74871571c0..667e1a3586 100644 --- a/README.md +++ b/README.md @@ -297,3 +297,7 @@ Oleg Bartunov Postgres Professional Ltd., Russia Teodor Sigaev Postgres Professional Ltd., Russia Arthur Zakirov Postgres Professional Ltd., Russia + +Pavel Borisov Postgres Professional Ltd., Russia + +Maxim Orlov Postgres Professional Ltd., Russia diff --git a/src/rum.h b/src/rum.h index 5103935ab1..1b5b428e27 100644 --- a/src/rum.h +++ b/src/rum.h @@ -3,8 +3,8 @@ * rum.h * Exported definitions for RUM index. * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 2006-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2006-2022, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index ed7e6dacfb..1ee57dbe29 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -3,7 +3,7 @@ * rum_arr_utils.c * various anyarray-search functions * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 536fc5375b..97e3c37fda 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -3,8 +3,8 @@ * rum_ts_utils.c * various text-search functions * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/rumbtree.c b/src/rumbtree.c index 2c168b100d..f513848da4 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumbulk.c b/src/rumbulk.c index 2b38bf46e3..ee93df9fe9 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 1533adb89b..723a8d3aae 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 9b759a1ff2..5b0139d95e 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumget.c b/src/rumget.c index 3834394477..032a8a1d94 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -4,8 +4,8 @@ * fetch tuples from a RUM scan. * * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/ruminsert.c b/src/ruminsert.c index 852ee0679b..5fe33c38f7 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumscan.c b/src/rumscan.c index 65d781c710..42bca53822 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.c b/src/rumsort.c index 6a0c920d9f..39883b910f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -8,8 +8,8 @@ * src/backend/utils/sort/tuplesort.c. * * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 74189c37eb..55cd49d19d 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -3,7 +3,7 @@ * rumtsquery.c * Inverted fulltext search: indexing tsqueries. * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumutil.c b/src/rumutil.c index a9ff1ef981..1d3719d15c 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 07a584121a..cce9cb09ee 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres RUM * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvalidate.c b/src/rumvalidate.c index a8c9e91a8f..c4e7dc145f 100644 --- a/src/rumvalidate.c +++ b/src/rumvalidate.c @@ -3,7 +3,7 @@ * rumvalidate.c * Opclass validator for RUM. * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * From c538b9f9c29884c8356f6227470122667157e553 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Thu, 26 May 2022 00:28:08 +0400 Subject: [PATCH 142/182] Update copyrights. Stamp 1.3.10 --- README.md | 4 ++++ src/rum.h | 4 ++-- src/rum_arr_utils.c | 2 +- src/rum_ts_utils.c | 4 ++-- src/rumbtree.c | 2 +- src/rumbulk.c | 2 +- src/rumdatapage.c | 2 +- src/rumentrypage.c | 2 +- src/rumget.c | 4 ++-- src/ruminsert.c | 2 +- src/rumscan.c | 2 +- src/rumsort.c | 4 ++-- src/rumtsquery.c | 2 +- src/rumutil.c | 2 +- src/rumvacuum.c | 2 +- src/rumvalidate.c | 2 +- 16 files changed, 23 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 74871571c0..667e1a3586 100644 --- a/README.md +++ b/README.md @@ -297,3 +297,7 @@ Oleg Bartunov Postgres Professional Ltd., Russia Teodor Sigaev Postgres Professional Ltd., Russia Arthur Zakirov Postgres Professional Ltd., Russia + +Pavel Borisov Postgres Professional Ltd., Russia + +Maxim Orlov Postgres Professional Ltd., Russia diff --git a/src/rum.h b/src/rum.h index 5103935ab1..1b5b428e27 100644 --- a/src/rum.h +++ b/src/rum.h @@ -3,8 +3,8 @@ * rum.h * Exported definitions for RUM index. * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 2006-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2006-2022, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index ed7e6dacfb..1ee57dbe29 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -3,7 +3,7 @@ * rum_arr_utils.c * various anyarray-search functions * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 536fc5375b..97e3c37fda 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -3,8 +3,8 @@ * rum_ts_utils.c * various text-search functions * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * *------------------------------------------------------------------------- */ diff --git a/src/rumbtree.c b/src/rumbtree.c index 2c168b100d..f513848da4 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumbulk.c b/src/rumbulk.c index 2b38bf46e3..ee93df9fe9 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 1533adb89b..723a8d3aae 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 9b759a1ff2..5b0139d95e 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumget.c b/src/rumget.c index 3834394477..032a8a1d94 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -4,8 +4,8 @@ * fetch tuples from a RUM scan. * * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/ruminsert.c b/src/ruminsert.c index 852ee0679b..5fe33c38f7 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumscan.c b/src/rumscan.c index 65d781c710..42bca53822 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.c b/src/rumsort.c index 6a0c920d9f..39883b910f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -8,8 +8,8 @@ * src/backend/utils/sort/tuplesort.c. * * - * Portions Copyright (c) 2015-2021, Postgres Professional - * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group + * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * *------------------------------------------------------------------------- diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 74189c37eb..55cd49d19d 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -3,7 +3,7 @@ * rumtsquery.c * Inverted fulltext search: indexing tsqueries. * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumutil.c b/src/rumutil.c index a9ff1ef981..1d3719d15c 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvacuum.c b/src/rumvacuum.c index 07a584121a..cce9cb09ee 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres RUM * * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvalidate.c b/src/rumvalidate.c index a8c9e91a8f..c4e7dc145f 100644 --- a/src/rumvalidate.c +++ b/src/rumvalidate.c @@ -3,7 +3,7 @@ * rumvalidate.c * Opclass validator for RUM. * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2022, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * From 7197b83c9e718ef7bd4b2e17a6ca6851ae51a060 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Fri, 27 May 2022 13:45:43 +0300 Subject: [PATCH 143/182] Fix build with enable-dtrace refer to issue #108 --- src/disable_core_macro.h | 24 ++++++++++++++++++++++++ src/tuplesort10.c | 2 ++ src/tuplesort11.c | 2 ++ src/tuplesort12.c | 2 ++ src/tuplesort13.c | 2 ++ src/tuplesort14.c | 2 ++ src/tuplesort15.c | 2 ++ src/tuplesort96.c | 2 ++ 8 files changed, 38 insertions(+) create mode 100644 src/disable_core_macro.h diff --git a/src/disable_core_macro.h b/src/disable_core_macro.h new file mode 100644 index 0000000000..ed27ed3c2e --- /dev/null +++ b/src/disable_core_macro.h @@ -0,0 +1,24 @@ +/*------------------------------------------------------------------------- + * + * disable_core_macro.h + * Support including tuplesort.c from postgresql core code. + * + * Copyright (c) 2022, Postgres Professional + * + *------------------------------------------------------------------------- + */ + +#ifndef __DISABLE_CORE_MACRO_H__ +#define __DISABLE_CORE_MACRO_H__ + +#undef TRACE_SORT +#undef DEBUG_BOUNDED_SORT +#undef TRACE_POSTGRESQL_SORT_START +#undef TRACE_POSTGRESQL_SORT_DONE + +#define TRACE_POSTGRESQL_SORT_START(arg1, arg2, arg3, arg4, arg5, arg6) \ + do {} while(0) +#define TRACE_POSTGRESQL_SORT_DONE(arg1, arg2) \ + do {} while(0) + +#endif /* __DISABLE_CORE_MACRO_H__ */ diff --git a/src/tuplesort10.c b/src/tuplesort10.c index 80bc67bc9e..5a829a9240 100644 --- a/src/tuplesort10.c +++ b/src/tuplesort10.c @@ -143,6 +143,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 diff --git a/src/tuplesort11.c b/src/tuplesort11.c index 60223929e5..23de559073 100644 --- a/src/tuplesort11.c +++ b/src/tuplesort11.c @@ -114,6 +114,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 diff --git a/src/tuplesort12.c b/src/tuplesort12.c index f975d24a98..796c1b8392 100644 --- a/src/tuplesort12.c +++ b/src/tuplesort12.c @@ -114,6 +114,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 diff --git a/src/tuplesort13.c b/src/tuplesort13.c index 98d68a143d..87354a38b4 100644 --- a/src/tuplesort13.c +++ b/src/tuplesort13.c @@ -114,6 +114,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 diff --git a/src/tuplesort14.c b/src/tuplesort14.c index b17347b214..85c8b10415 100644 --- a/src/tuplesort14.c +++ b/src/tuplesort14.c @@ -114,6 +114,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 diff --git a/src/tuplesort15.c b/src/tuplesort15.c index e8da988a73..6f7a59f5d6 100644 --- a/src/tuplesort15.c +++ b/src/tuplesort15.c @@ -118,6 +118,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 diff --git a/src/tuplesort96.c b/src/tuplesort96.c index d1c29b2f04..743e025b86 100644 --- a/src/tuplesort96.c +++ b/src/tuplesort96.c @@ -141,6 +141,8 @@ #include "utils/sortsupport.h" #include "utils/tuplesort.h" +/* Should be the last include */ +#include "disable_core_macro.h" /* sort-type codes for sort__start probes */ #define HEAP_SORT 0 From fc3b03b219cc001aff071445cf663ecca76a3727 Mon Sep 17 00:00:00 2001 From: Pavel Borisov Date: Fri, 27 May 2022 16:16:48 +0400 Subject: [PATCH 144/182] Fix TRACE_START redefinition in PG10 - --- src/disable_core_macro.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/disable_core_macro.h b/src/disable_core_macro.h index ed27ed3c2e..922ad4cf75 100644 --- a/src/disable_core_macro.h +++ b/src/disable_core_macro.h @@ -16,9 +16,18 @@ #undef TRACE_POSTGRESQL_SORT_START #undef TRACE_POSTGRESQL_SORT_DONE +#if PG_VERSION_NUM >= 110000 #define TRACE_POSTGRESQL_SORT_START(arg1, arg2, arg3, arg4, arg5, arg6) \ do {} while(0) +#else +#define TRACE_POSTGRESQL_SORT_START(arg1, arg2, arg3, arg4, arg5) \ + do {} while(0) +#endif + + #define TRACE_POSTGRESQL_SORT_DONE(arg1, arg2) \ do {} while(0) + + #endif /* __DISABLE_CORE_MACRO_H__ */ From 92f42d5cd8bb78ee1cb2287ea48b9b1e2b672a33 Mon Sep 17 00:00:00 2001 From: "Mikhail A. Kulagin" Date: Fri, 27 May 2022 16:19:32 +0300 Subject: [PATCH 145/182] * Fix travis warnings root: deprecated key sudo (The key `sudo` has no effect anymore.) root: missing dist, using the default xenial root: missing os, using the default linux * Add pg-15 --- .travis.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 8952b507cd..8b87257d44 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,6 @@ -sudo: required +os: linux + +dist: bionic language: c @@ -21,6 +23,8 @@ notifications: on_failure: always env: + - PG_VERSION=15beta1 + - PG_VERSION=15beta1 LEVEL=hardcore - PG_VERSION=14 - PG_VERSION=14 LEVEL=hardcore - PG_VERSION=13 From 210abdd6432dca8155b1d1f20c4cad69d198013b Mon Sep 17 00:00:00 2001 From: Yura Sokolov Date: Thu, 21 Jul 2022 23:22:09 +0300 Subject: [PATCH 146/182] Revert "Make compatible with amroutine changes in V15" Change in IndexAmRoutine amroutine were reverted in e3fcca0d Revert changes in HOT handling of BRIN indexes This reverts commit 6f47dc73ad5281b5daeb53f0f6137aca292fa183. --- src/rumutil.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/rumutil.c b/src/rumutil.c index 1d3719d15c..8838bd8aeb 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -128,9 +128,6 @@ rumhandler(PG_FUNCTION_ARGS) amroutine->ampredlocks = true; #if PG_VERSION_NUM >= 100000 amroutine->amcanparallel = false; -#endif -#if PG_VERSION_NUM >= 150000 - amroutine->amhotblocking = true; #endif amroutine->amkeytype = InvalidOid; From 1ffa817690b2226710d3edc68cb2e3b26c549a2b Mon Sep 17 00:00:00 2001 From: Yura Sokolov Date: Fri, 22 Jul 2022 16:45:01 +0300 Subject: [PATCH 147/182] Update src/tuplessort15.c for new changes upto 2022-06-01 --- src/tuplesort15.c | 44 +++++++++++++++++++++----------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/tuplesort15.c b/src/tuplesort15.c index 6f7a59f5d6..37184cdcac 100644 --- a/src/tuplesort15.c +++ b/src/tuplesort15.c @@ -471,7 +471,7 @@ struct Tuplesortstate /* These are specific to the index_btree subcase: */ bool enforceUnique; /* complain if we find duplicate tuples */ - bool uniqueNullsNotDistinct; /* unique constraint null treatment */ + bool uniqueNullsNotDistinct; /* unique constraint null treatment */ /* These are specific to the index_hash subcase: */ uint32 high_mask; /* masks for sortable part of hash code */ @@ -708,8 +708,8 @@ qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) return compare; /* - * No need to waste effort calling the tiebreak function when there are - * no other keys to sort on. + * No need to waste effort calling the tiebreak function when there are no + * other keys to sort on. */ if (state->onlyKey != NULL) return 0; @@ -717,6 +717,7 @@ qsort_tuple_unsigned_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) return state->comparetup(a, b, state); } +#if SIZEOF_DATUM >= 8 /* Used if first key's comparator is ssup_datum_signed_compare */ static pg_attribute_always_inline int qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) @@ -731,14 +732,15 @@ qsort_tuple_signed_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) return compare; /* - * No need to waste effort calling the tiebreak function when there are - * no other keys to sort on. + * No need to waste effort calling the tiebreak function when there are no + * other keys to sort on. */ if (state->onlyKey != NULL) return 0; return state->comparetup(a, b, state); } +#endif /* Used if first key's comparator is ssup_datum_int32_compare */ static pg_attribute_always_inline int @@ -747,15 +749,15 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) int compare; compare = ApplyInt32SortComparator(a->datum1, a->isnull1, - b->datum1, b->isnull1, - &state->sortKeys[0]); + b->datum1, b->isnull1, + &state->sortKeys[0]); if (compare != 0) return compare; /* - * No need to waste effort calling the tiebreak function when there are - * no other keys to sort on. + * No need to waste effort calling the tiebreak function when there are no + * other keys to sort on. */ if (state->onlyKey != NULL) return 0; @@ -781,6 +783,7 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) #define ST_DEFINE #include "lib/sort_template.h" +#if SIZEOF_DATUM >= 8 #define ST_SORT qsort_tuple_signed #define ST_ELEMENT_TYPE SortTuple #define ST_COMPARE(a, b, state) qsort_tuple_signed_compare(a, b, state) @@ -789,6 +792,7 @@ qsort_tuple_int32_compare(SortTuple *a, SortTuple *b, Tuplesortstate *state) #define ST_SCOPE static #define ST_DEFINE #include "lib/sort_template.h" +#endif #define ST_SORT qsort_tuple_int32 #define ST_ELEMENT_TYPE SortTuple @@ -3662,23 +3666,22 @@ tuplesort_sort_memtuples(Tuplesortstate *state) { if (state->sortKeys[0].comparator == ssup_datum_unsigned_cmp) { - elog(DEBUG1, "qsort_tuple_unsigned"); qsort_tuple_unsigned(state->memtuples, state->memtupcount, state); return; } +#if SIZEOF_DATUM >= 8 else if (state->sortKeys[0].comparator == ssup_datum_signed_cmp) { - elog(DEBUG1, "qsort_tuple_signed"); qsort_tuple_signed(state->memtuples, state->memtupcount, state); return; } +#endif else if (state->sortKeys[0].comparator == ssup_datum_int32_cmp) { - elog(DEBUG1, "qsort_tuple_int32"); qsort_tuple_int32(state->memtuples, state->memtupcount, state); @@ -3689,13 +3692,11 @@ tuplesort_sort_memtuples(Tuplesortstate *state) /* Can we use the single-key sort function? */ if (state->onlyKey != NULL) { - elog(DEBUG1, "qsort_ssup"); qsort_ssup(state->memtuples, state->memtupcount, state->onlyKey); } else { - elog(DEBUG1, "qsort_tuple"); qsort_tuple(state->memtuples, state->memtupcount, state->comparetup, @@ -4907,16 +4908,12 @@ ssup_datum_unsigned_cmp(Datum x, Datum y, SortSupport ssup) return 0; } +#if SIZEOF_DATUM >= 8 int ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup) { -#if SIZEOF_DATUM == 8 - int64 xx = (int64) x; - int64 yy = (int64) y; -#else - int32 xx = (int32) x; - int32 yy = (int32) y; -#endif + int64 xx = DatumGetInt64(x); + int64 yy = DatumGetInt64(y); if (xx < yy) return -1; @@ -4925,12 +4922,13 @@ ssup_datum_signed_cmp(Datum x, Datum y, SortSupport ssup) else return 0; } +#endif int ssup_datum_int32_cmp(Datum x, Datum y, SortSupport ssup) { - int32 xx = (int32) x; - int32 yy = (int32) y; + int32 xx = DatumGetInt32(x); + int32 yy = DatumGetInt32(y); if (xx < yy) return -1; From 6530adcd0251024b4b97adcadfee69454811dd7d Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Mon, 4 Jul 2022 16:21:00 +0300 Subject: [PATCH 148/182] [PGPRO-6780] Fix Windows warnings --- src/rum.h | 6 +++--- src/rum_ts_utils.c | 2 +- src/rumsort.c | 3 +++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/rum.h b/src/rum.h index 1b5b428e27..c242e699e4 100644 --- a/src/rum.h +++ b/src/rum.h @@ -868,9 +868,9 @@ extern Datum rum_anyarray_distance(PG_FUNCTION_ARGS); /* GUC parameters */ -extern PGDLLIMPORT int RumFuzzySearchLimit; -extern PGDLLIMPORT float8 RumArraySimilarityThreshold; -extern PGDLLIMPORT int RumArraySimilarityFunction; +extern int RumFuzzySearchLimit; +extern float8 RumArraySimilarityThreshold; +extern int RumArraySimilarityFunction; /* diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 97e3c37fda..62ccacf7c7 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -172,7 +172,7 @@ typedef struct DocRepresentation *end; } Extention; -static float weights[] = {1.0/0.1f, 1.0/0.2f, 1.0/0.4f, 1.0/1.0f}; +static float weights[] = {1.0f/0.1f, 1.0f/0.2f, 1.0f/0.4f, 1.0f/1.0f}; /* A dummy WordEntryPos array to use when haspos is false */ static WordEntryPosVector POSNULL = { diff --git a/src/rumsort.c b/src/rumsort.c index 39883b910f..045d166494 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -208,7 +208,10 @@ rum_item_size(RumTuplesortstate * state) else if (state->copytup == copytup_rumitem) return sizeof(RumScanItem); else + { elog (FATAL, "Unknown RUM state"); + return 0; + } } static void From 5719f28f1b52428ada7f7ba9ccc8aa1d9aef59b5 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Tue, 6 Sep 2022 16:26:23 +0300 Subject: [PATCH 149/182] Refactoring to be pg16 ready. --- src/rumsort.c | 184 +++++++++++++++++++++++++++++++------------------- 1 file changed, 115 insertions(+), 69 deletions(-) diff --git a/src/rumsort.c b/src/rumsort.c index 39883b910f..962ac1c12f 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -44,8 +44,8 @@ #endif /* - * We need extra field in a state structure but we should not modify struct RumTuplesortstate - * which is inherited from Tuplesortstate core function. + * We need extra field in a state structure but we should not modify struct + * RumTuplesortstate which is inherited from Tuplesortstate core function. */ typedef struct RumTuplesortstateExt { @@ -55,18 +55,28 @@ typedef struct RumTuplesortstateExt static int compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2); static int comparetup_rum(const SortTuple *a, const SortTuple *b, - RumTuplesortstate * state, bool compareItemPointer); + RumTuplesortstate *state, bool compareItemPointer); static int comparetup_rum_true(const SortTuple *a, const SortTuple *b, - RumTuplesortstate * state); + RumTuplesortstate *state); static int comparetup_rum_false(const SortTuple *a, const SortTuple *b, - RumTuplesortstate * state); + RumTuplesortstate *state); static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, - RumTuplesortstate * state); -static void copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup); -static void copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup); -static void *rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *should_free); + RumTuplesortstate *state); +static void copytup_rum(RumTuplesortstate *state, SortTuple *stup, void *tup); +static void copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, + void *tup); +static void *rum_tuplesort_getrum_internal(RumTuplesortstate *state, + bool forward, bool *should_free); + +#if PG_VERSION_NUM >= 160000 +# define TSS_GET(state) \ + TuplesortstateGetPublic((state)) +#else +# define TSS_GET(state) \ + (state) +#endif -static int +static inline int compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2) { if (p1.ip_blkid.bi_hi < p2.ip_blkid.bi_hi) @@ -88,7 +98,8 @@ compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2) } static int -comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state, bool compareItemPointer) +comparetup_rum(const SortTuple *a, const SortTuple *b, + RumTuplesortstate *state, bool compareItemPointer) { RumSortItem *i1, *i2; @@ -104,7 +115,7 @@ comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state i1 = (RumSortItem *) a->tuple; i2 = (RumSortItem *) b->tuple; - for (i = 1; i < state->nKeys; i++) + for (i = 1; i < TSS_GET(state)->nKeys; i++) { if (i1->data[i] < i2->data[i]) return -1; @@ -122,19 +133,22 @@ comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state } static int -comparetup_rum_true(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +comparetup_rum_true(const SortTuple *a, const SortTuple *b, + RumTuplesortstate *state) { return comparetup_rum(a, b, state, true); } static int -comparetup_rum_false(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +comparetup_rum_false(const SortTuple *a, const SortTuple *b, + RumTuplesortstate *state) { return comparetup_rum(a, b, state, false); } static int -comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * state) +comparetup_rumitem(const SortTuple *a, const SortTuple *b, + RumTuplesortstate *state) { RumItem *i1, *i2; @@ -171,23 +185,24 @@ comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate * s } static void -copytup_rum(RumTuplesortstate * state, SortTuple *stup, void *tup) +copytup_rum(RumTuplesortstate *state, SortTuple *stup, void *tup) { RumSortItem *item = (RumSortItem *) tup; + int nKeys = TSS_GET(state)->nKeys; - stup->datum1 = Float8GetDatum(state->nKeys > 0 ? item->data[0] : 0); + stup->datum1 = Float8GetDatum(nKeys > 0 ? item->data[0] : 0); stup->isnull1 = false; stup->tuple = tup; - USEMEM(state, GetMemoryChunkSpace(tup)); + //USEMEM(state, GetMemoryChunkSpace(tup)); } static void -copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) +copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, void *tup) { stup->isnull1 = true; stup->tuple = palloc(sizeof(RumScanItem)); memcpy(stup->tuple, tup, sizeof(RumScanItem)); - USEMEM(state, GetMemoryChunkSpace(stup->tuple)); + //USEMEM(state, GetMemoryChunkSpace(stup->tuple)); } #if PG_VERSION_NUM >= 150000 @@ -200,19 +215,27 @@ copytup_rumitem(RumTuplesortstate * state, SortTuple *stup, void *tup) #define TAPE(state, LT_ARG) state->tapeset, LT_ARG #endif +static void readtup_rum(RumTuplesortstate *state, SortTuple *stup, + LT_TYPE LT_ARG, unsigned int len); + +static void readtup_rumitem(RumTuplesortstate *state, SortTuple *stup, + LT_TYPE LT_ARG, unsigned int len); + static Size -rum_item_size(RumTuplesortstate * state) +rum_item_size(RumTuplesortstate *state) { - if (state->copytup == copytup_rum) - return RumSortItemSize(state->nKeys); - else if (state->copytup == copytup_rumitem) + if (TSS_GET(state)->readtup == readtup_rum) + return RumSortItemSize(TSS_GET(state)->nKeys); + else if (TSS_GET(state)->readtup == readtup_rumitem) return sizeof(RumScanItem); else elog (FATAL, "Unknown RUM state"); + return 0; /* Silence compiler */ } static void -writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) +writetup_rum_internal(RumTuplesortstate *state, LT_TYPE LT_ARG, + SortTuple *stup) { void *item = stup->tuple; size_t size = rum_item_size(state); @@ -223,28 +246,29 @@ writetup_rum_internal(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup LogicalTapeWrite(TAPE(state, LT_ARG), (void *) item, size); #if PG_VERSION_NUM >= 150000 - if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ + if (TSS_GET(state)->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing + * length word? */ #else - if (state->randomAccess) /* need trailing length word? */ + if (TSS_GET(state)->randomAccess) /* need trailing length word? */ #endif - LogicalTapeWrite(TAPE(state, LT_ARG), - (void *) &writtenlen, sizeof(writtenlen)); + LogicalTapeWrite(TAPE(TSS_GET(state), LT_ARG), (void *) &writtenlen, + sizeof(writtenlen)); } static void -writetup_rum(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) +writetup_rum(RumTuplesortstate *state, LT_TYPE LT_ARG, SortTuple *stup) { writetup_rum_internal(state, LT_ARG, stup); } static void -writetup_rumitem(RumTuplesortstate * state, LT_TYPE LT_ARG, SortTuple *stup) +writetup_rumitem(RumTuplesortstate *state, LT_TYPE LT_ARG, SortTuple *stup) { writetup_rum_internal(state, LT_ARG, stup); } static void -readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, +readtup_rum_internal(RumTuplesortstate *state, SortTuple *stup, LT_TYPE LT_ARG, unsigned int len, bool is_item) { unsigned int tuplen = len - sizeof(unsigned int); @@ -253,36 +277,39 @@ readtup_rum_internal(RumTuplesortstate * state, SortTuple *stup, Assert(tuplen == size); - USEMEM(state, GetMemoryChunkSpace(item)); + //USEMEM(state, GetMemoryChunkSpace(item)); #if PG_VERSION_NUM >= 150000 LogicalTapeReadExact(LT_ARG, item, size); #else - LogicalTapeReadExact(state->tapeset, LT_ARG, item, size); + LogicalTapeReadExact(TSS_GET(state)->tapeset, LT_ARG, item, size); #endif stup->tuple = item; stup->isnull1 = is_item; if (!is_item) - stup->datum1 = Float8GetDatum(state->nKeys > 0 ? ((RumSortItem *) item)->data[0] : 0); + stup->datum1 = Float8GetDatum(TSS_GET(state)->nKeys > 0 ? + ((RumSortItem *) item)->data[0] : 0); #if PG_VERSION_NUM >= 150000 - if (state->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */ + if (TSS_GET(state)->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing + * length word? */ LogicalTapeReadExact(LT_ARG, &tuplen, sizeof(tuplen)); #else - if (state->randomAccess) - LogicalTapeReadExact(state->tapeset, LT_ARG, &tuplen, sizeof(tuplen)); + if (TSS_GET(state)->randomAccess) + LogicalTapeReadExact(TSS_GET(state)->tapeset, LT_ARG, &tuplen, + sizeof(tuplen)); #endif } static void -readtup_rum(RumTuplesortstate * state, SortTuple *stup, - LT_TYPE LT_ARG, unsigned int len) +readtup_rum(RumTuplesortstate *state, SortTuple *stup, LT_TYPE LT_ARG, + unsigned int len) { readtup_rum_internal(state, stup, LT_ARG, len, false); } static void -readtup_rumitem(RumTuplesortstate * state, SortTuple *stup, - LT_TYPE LT_ARG, unsigned int len) +readtup_rumitem(RumTuplesortstate *state, SortTuple *stup, LT_TYPE LT_ARG, + unsigned int len) { readtup_rum_internal(state, stup, LT_ARG, len, true); } @@ -305,7 +332,7 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, #endif MemoryContext oldcontext; - oldcontext = MemoryContextSwitchTo(state->sortcontext); + oldcontext = MemoryContextSwitchTo(TSS_GET(state)->sortcontext); #ifdef TRACE_SORT if (trace_sort) @@ -314,12 +341,11 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, nKeys, workMem, randomAccess ? 't' : 'f'); #endif - state->nKeys = nKeys; - - state->comparetup = compareItemPointer ? comparetup_rum_true : comparetup_rum_false; - state->copytup = copytup_rum; - state->writetup = writetup_rum; - state->readtup = readtup_rum; + TSS_GET(state)->nKeys = nKeys; + TSS_GET(state)->comparetup = compareItemPointer ? comparetup_rum_true : + comparetup_rum_false; + TSS_GET(state)->writetup = writetup_rum; + TSS_GET(state)->readtup = readtup_rum; MemoryContextSwitchTo(oldcontext); @@ -333,7 +359,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) RumTuplesortstateExt *rs; MemoryContext oldcontext; - oldcontext = MemoryContextSwitchTo(state->sortcontext); + oldcontext = MemoryContextSwitchTo(TSS_GET(state)->sortcontext); /* Allocate extended state in the same context as state */ rs = palloc(sizeof(*rs)); @@ -345,10 +371,9 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) #endif rs->cmp = cmp; - state->comparetup = comparetup_rumitem; - state->copytup = copytup_rumitem; - state->writetup = writetup_rumitem; - state->readtup = readtup_rumitem; + TSS_GET(state)->comparetup = comparetup_rumitem; + TSS_GET(state)->writetup = writetup_rumitem; + TSS_GET(state)->readtup = readtup_rumitem; memcpy(&rs->ts, state, sizeof(RumTuplesortstate)); pfree(state); /* just to be sure *state isn't used anywhere * else */ @@ -368,7 +393,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) * pointers afterwards! */ void -rum_tuplesort_end(RumTuplesortstate * state) +rum_tuplesort_end(RumTuplesortstate *state) { #if PG_VERSION_NUM >= 130000 tuplesort_free(state); @@ -382,39 +407,57 @@ rum_tuplesort_end(RumTuplesortstate * state) * RumSortItem. */ MemoryContext -rum_tuplesort_get_memorycontext(RumTuplesortstate * state) +rum_tuplesort_get_memorycontext(RumTuplesortstate *state) { - return state->sortcontext; + return TSS_GET(state)->sortcontext; } void rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem *item) { - tuplesort_puttupleslot(state, (TupleTableSlot *) item); + MemoryContext oldcontext; + SortTuple stup; + + oldcontext = MemoryContextSwitchTo(rum_tuplesort_get_memorycontext(state)); + copytup_rum(state, &stup, item); + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); } void rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem *item) { - tuplesort_puttupleslot(state, (TupleTableSlot *) item); + MemoryContext oldcontext; + SortTuple stup; + + oldcontext = MemoryContextSwitchTo(rum_tuplesort_get_memorycontext(state)); + copytup_rumitem(state, &stup, item); + puttuple_common(state, &stup); + + MemoryContextSwitchTo(oldcontext); } void -rum_tuplesort_performsort(RumTuplesortstate * state) +rum_tuplesort_performsort(RumTuplesortstate *state) { tuplesort_performsort(state); } /* - * Internal routine to fetch the next index tuple in either forward or back direction. - * Returns NULL if no more tuples. Returned tuple belongs to tuplesort memory context. Caller may not rely on tuple remaining valid after any further manipulation of tuplesort. + * Internal routine to fetch the next index tuple in either forward or back + * direction. Returns NULL if no more tuples. Returned tuple belongs to + * tuplesort memory context. Caller may not rely on tuple remaining valid after + * any further manipulation of tuplesort. + * * If *should_free is set, the caller must pfree stup.tuple when done with it. * - * NOTE: in PG 10 and newer tuple is always allocated tuple in tuplesort context and - * should not be freed by caller. + * NOTE: in PG 10 and newer tuple is always allocated tuple in tuplesort context + * and should not be freed by caller. */ static void * -rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *should_free) +rum_tuplesort_getrum_internal(RumTuplesortstate *state, bool forward, + bool *should_free) { #if PG_VERSION_NUM >= 100000 *should_free = false; @@ -425,13 +468,16 @@ rum_tuplesort_getrum_internal(RumTuplesortstate * state, bool forward, bool *sho } RumSortItem * -rum_tuplesort_getrum(RumTuplesortstate * state, bool forward, bool *should_free) +rum_tuplesort_getrum(RumTuplesortstate *state, bool forward, bool *should_free) { - return (RumSortItem *) rum_tuplesort_getrum_internal(state, forward, should_free); + return (RumSortItem *) rum_tuplesort_getrum_internal(state, forward, + should_free); } RumScanItem * -rum_tuplesort_getrumitem(RumTuplesortstate * state, bool forward, bool *should_free) +rum_tuplesort_getrumitem(RumTuplesortstate *state, bool forward, + bool *should_free) { - return (RumScanItem *) rum_tuplesort_getrum_internal(state, forward, should_free); + return (RumScanItem *) rum_tuplesort_getrum_internal(state, forward, + should_free); } From db8b9d6efa0a823e46109b819d4586c53ca11ed3 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Tue, 6 Sep 2022 17:08:18 +0300 Subject: [PATCH 150/182] Add support for pg16. --- src/rumsort.c | 60 +++++---- src/tuplesort16.c | 307 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 343 insertions(+), 24 deletions(-) create mode 100644 src/tuplesort16.c diff --git a/src/rumsort.c b/src/rumsort.c index 962ac1c12f..66d9fd322a 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -27,7 +27,10 @@ #include "rum.h" /* RumItem */ -#if PG_VERSION_NUM >= 150000 +#if PG_VERSION_NUM >= 160000 +#include "tuplesort16.c" +#undef TRACE_SORT +#elif PG_VERSION_NUM >= 150000 #include "tuplesort15.c" #elif PG_VERSION_NUM >= 140000 #include "tuplesort14.c" @@ -53,7 +56,6 @@ typedef struct RumTuplesortstateExt FmgrInfo *cmp; } RumTuplesortstateExt; -static int compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2); static int comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state, bool compareItemPointer); static int comparetup_rum_true(const SortTuple *a, const SortTuple *b, @@ -69,11 +71,19 @@ static void *rum_tuplesort_getrum_internal(RumTuplesortstate *state, bool forward, bool *should_free); #if PG_VERSION_NUM >= 160000 -# define TSS_GET(state) \ - TuplesortstateGetPublic((state)) +# define TSS_GET(state) TuplesortstateGetPublic((state)) +#else +# define TSS_GET(state) (state) +#endif + +#if PG_VERSION_NUM >= 150000 +#define LT_TYPE LogicalTape * +#define LT_ARG tape +#define TAPE(state, LT_ARG) LT_ARG #else -# define TSS_GET(state) \ - (state) +#define LT_TYPE int +#define LT_ARG tapenum +#define TAPE(state, LT_ARG) state->tapeset, LT_ARG #endif static inline int @@ -152,12 +162,14 @@ comparetup_rumitem(const SortTuple *a, const SortTuple *b, { RumItem *i1, *i2; + FmgrInfo *cmp; /* Extract RumItem from RumScanItem */ i1 = (RumItem *) a->tuple; i2 = (RumItem *) b->tuple; - if (((RumTuplesortstateExt *) state)->cmp) + cmp = ((RumTuplesortstateExt *) state)->cmp; + if (cmp != NULL) { if (i1->addInfoIsNull || i2->addInfoIsNull) { @@ -169,7 +181,7 @@ comparetup_rumitem(const SortTuple *a, const SortTuple *b, { int r; - r = DatumGetInt32(FunctionCall2(((RumTuplesortstateExt *) state)->cmp, + r = DatumGetInt32(FunctionCall2(cmp, i1->addInfo, i2->addInfo)); @@ -193,7 +205,7 @@ copytup_rum(RumTuplesortstate *state, SortTuple *stup, void *tup) stup->datum1 = Float8GetDatum(nKeys > 0 ? item->data[0] : 0); stup->isnull1 = false; stup->tuple = tup; - //USEMEM(state, GetMemoryChunkSpace(tup)); + USEMEM(state, GetMemoryChunkSpace(tup)); } static void @@ -202,19 +214,9 @@ copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, void *tup) stup->isnull1 = true; stup->tuple = palloc(sizeof(RumScanItem)); memcpy(stup->tuple, tup, sizeof(RumScanItem)); - //USEMEM(state, GetMemoryChunkSpace(stup->tuple)); + USEMEM(state, GetMemoryChunkSpace(stup->tuple)); } -#if PG_VERSION_NUM >= 150000 -#define LT_TYPE LogicalTape * -#define LT_ARG tape -#define TAPE(state, LT_ARG) LT_ARG -#else -#define LT_TYPE int -#define LT_ARG tapenum -#define TAPE(state, LT_ARG) state->tapeset, LT_ARG -#endif - static void readtup_rum(RumTuplesortstate *state, SortTuple *stup, LT_TYPE LT_ARG, unsigned int len); @@ -228,9 +230,9 @@ rum_item_size(RumTuplesortstate *state) return RumSortItemSize(TSS_GET(state)->nKeys); else if (TSS_GET(state)->readtup == readtup_rumitem) return sizeof(RumScanItem); - else - elog (FATAL, "Unknown RUM state"); - return 0; /* Silence compiler */ + + elog (FATAL, "Unknown RUM state"); + return 0; /* keep compiler quiet */ } static void @@ -277,7 +279,7 @@ readtup_rum_internal(RumTuplesortstate *state, SortTuple *stup, Assert(tuplen == size); - //USEMEM(state, GetMemoryChunkSpace(item)); + USEMEM(state, GetMemoryChunkSpace(item)); #if PG_VERSION_NUM >= 150000 LogicalTapeReadExact(LT_ARG, item, size); #else @@ -420,7 +422,12 @@ rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem *item) oldcontext = MemoryContextSwitchTo(rum_tuplesort_get_memorycontext(state)); copytup_rum(state, &stup, item); + +#if PG_VERSION_NUM >= 160000 + tuplesort_puttuple_common(state, &stup, false); +#else puttuple_common(state, &stup); +#endif MemoryContextSwitchTo(oldcontext); } @@ -433,7 +440,12 @@ rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem *item) oldcontext = MemoryContextSwitchTo(rum_tuplesort_get_memorycontext(state)); copytup_rumitem(state, &stup, item); + +#if PG_VERSION_NUM >= 160000 + tuplesort_puttuple_common(state, &stup, false); +#else puttuple_common(state, &stup); +#endif MemoryContextSwitchTo(oldcontext); } diff --git a/src/tuplesort16.c b/src/tuplesort16.c new file mode 100644 index 0000000000..694d8f1a83 --- /dev/null +++ b/src/tuplesort16.c @@ -0,0 +1,307 @@ +/*------------------------------------------------------------------------- + * + * tuplesort16.c + * This file is a copy-paste from src/backend/utils/sort/tuplesort.c + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * rum/tuplesort16.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include + +#include "catalog/pg_am.h" +#include "commands/tablespace.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "pg_trace.h" +#include "storage/shmem.h" +#include "utils/memutils.h" +#include "utils/pg_rusage.h" +#include "utils/rel.h" +#include "utils/tuplesort.h" + +/* GUC variables */ +#ifdef TRACE_SORT +bool trace_sort = false; +#endif + +/* + * During merge, we use a pre-allocated set of fixed-size slots to hold + * tuples. To avoid palloc/pfree overhead. + * + * Merge doesn't require a lot of memory, so we can afford to waste some, + * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the + * palloc() overhead is not significant anymore. + * + * 'nextfree' is valid when this chunk is in the free list. When in use, the + * slot holds a tuple. + */ +#define SLAB_SLOT_SIZE 1024 + +typedef union SlabSlot +{ + union SlabSlot *nextfree; + char buffer[SLAB_SLOT_SIZE]; +} SlabSlot; + +/* + * Possible states of a Tuplesort object. These denote the states that + * persist between calls of Tuplesort routines. + */ +typedef enum +{ + TSS_INITIAL, /* Loading tuples; still within memory limit */ + TSS_BOUNDED, /* Loading tuples into bounded-size heap */ + TSS_BUILDRUNS, /* Loading tuples; writing to tape */ + TSS_SORTEDINMEM, /* Sort completed entirely in memory */ + TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ + TSS_FINALMERGE /* Performing final merge on-the-fly */ +} TupSortStatus; + +/* + * Parameters for calculation of number of tapes to use --- see inittapes() + * and tuplesort_merge_order(). + * + * In this calculation we assume that each tape will cost us about 1 blocks + * worth of buffer space. This ignores the overhead of all the other data + * structures needed for each tape, but it's probably close enough. + * + * MERGE_BUFFER_SIZE is how much buffer space we'd like to allocate for each + * input tape, for pre-reading (see discussion at top of file). This is *in + * addition to* the 1 block already included in TAPE_BUFFER_OVERHEAD. + */ +#define MINORDER 6 /* minimum merge order */ +#define MAXORDER 500 /* maximum merge order */ +#define TAPE_BUFFER_OVERHEAD BLCKSZ +#define MERGE_BUFFER_SIZE (BLCKSZ * 32) + + +/* + * Private state of a Tuplesort operation. + */ +struct Tuplesortstate +{ + TuplesortPublic base; + TupSortStatus status; /* enumerated value as shown above */ + bool bounded; /* did caller specify a maximum number of + * tuples to return? */ + bool boundUsed; /* true if we made use of a bounded heap */ + int bound; /* if bounded, the maximum number of tuples */ + int64 availMem; /* remaining memory available, in bytes */ + int64 allowedMem; /* total memory allowed, in bytes */ + int maxTapes; /* max number of input tapes to merge in each + * pass */ + int64 maxSpace; /* maximum amount of space occupied among sort + * of groups, either in-memory or on-disk */ + bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk + * space, false when it's value for in-memory + * space */ + TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ + LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ + + /* + * This array holds the tuples now in sort memory. If we are in state + * INITIAL, the tuples are in no particular order; if we are in state + * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS + * and FINALMERGE, the tuples are organized in "heap" order per Algorithm + * H. In state SORTEDONTAPE, the array is not used. + */ + SortTuple *memtuples; /* array of SortTuple structs */ + int memtupcount; /* number of tuples currently present */ + int memtupsize; /* allocated length of memtuples array */ + bool growmemtuples; /* memtuples' growth still underway? */ + + /* + * Memory for tuples is sometimes allocated using a simple slab allocator, + * rather than with palloc(). Currently, we switch to slab allocation + * when we start merging. Merging only needs to keep a small, fixed + * number of tuples in memory at any time, so we can avoid the + * palloc/pfree overhead by recycling a fixed number of fixed-size slots + * to hold the tuples. + * + * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE + * slots. The allocation is sized to have one slot per tape, plus one + * additional slot. We need that many slots to hold all the tuples kept + * in the heap during merge, plus the one we have last returned from the + * sort, with tuplesort_gettuple. + * + * Initially, all the slots are kept in a linked list of free slots. When + * a tuple is read from a tape, it is put to the next available slot, if + * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd + * instead. + * + * When we're done processing a tuple, we return the slot back to the free + * list, or pfree() if it was palloc'd. We know that a tuple was + * allocated from the slab, if its pointer value is between + * slabMemoryBegin and -End. + * + * When the slab allocator is used, the USEMEM/LACKMEM mechanism of + * tracking memory usage is not used. + */ + bool slabAllocatorUsed; + + char *slabMemoryBegin; /* beginning of slab memory arena */ + char *slabMemoryEnd; /* end of slab memory arena */ + SlabSlot *slabFreeHead; /* head of free list */ + + /* Memory used for input and output tape buffers. */ + size_t tape_buffer_mem; + + /* + * When we return a tuple to the caller in tuplesort_gettuple_XXX, that + * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE + * modes), we remember the tuple in 'lastReturnedTuple', so that we can + * recycle the memory on next gettuple call. + */ + void *lastReturnedTuple; + + /* + * While building initial runs, this is the current output run number. + * Afterwards, it is the number of initial runs we made. + */ + int currentRun; + + /* + * Logical tapes, for merging. + * + * The initial runs are written in the output tapes. In each merge pass, + * the output tapes of the previous pass become the input tapes, and new + * output tapes are created as needed. When nInputTapes equals + * nInputRuns, there is only one merge pass left. + */ + LogicalTape **inputTapes; + int nInputTapes; + int nInputRuns; + + LogicalTape **outputTapes; + int nOutputTapes; + int nOutputRuns; + + LogicalTape *destTape; /* current output tape */ + + /* + * These variables are used after completion of sorting to keep track of + * the next tuple to return. (In the tape case, the tape's current read + * position is also critical state.) + */ + LogicalTape *result_tape; /* actual tape of finished output */ + int current; /* array index (only used if SORTEDINMEM) */ + bool eof_reached; /* reached EOF (needed for cursors) */ + + /* markpos_xxx holds marked position for mark and restore */ + long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ + int markpos_offset; /* saved "current", or offset in tape block */ + bool markpos_eof; /* saved "eof_reached" */ + + /* + * These variables are used during parallel sorting. + * + * worker is our worker identifier. Follows the general convention that + * -1 value relates to a leader tuplesort, and values >= 0 worker + * tuplesorts. (-1 can also be a serial tuplesort.) + * + * shared is mutable shared memory state, which is used to coordinate + * parallel sorts. + * + * nParticipants is the number of worker Tuplesortstates known by the + * leader to have actually been launched, which implies that they must + * finish a run that the leader needs to merge. Typically includes a + * worker state held by the leader process itself. Set in the leader + * Tuplesortstate only. + */ + int worker; + Sharedsort *shared; + int nParticipants; + + /* + * Additional state for managing "abbreviated key" sortsupport routines + * (which currently may be used by all cases except the hash index case). + * Tracks the intervals at which the optimization's effectiveness is + * tested. + */ + int64 abbrevNext; /* Tuple # at which to next check + * applicability */ + + /* + * Resource snapshot for time of sort start. + */ +#ifdef TRACE_SORT + PGRUsage ru_start; +#endif +}; + +#define FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0) +#define USEMEM(state,amt) ((state)->availMem -= (amt)) +#define SERIAL(state) ((state)->shared == NULL) + +/* + * tuplesort_free + * + * Internal routine for freeing resources of tuplesort. + */ +static void +tuplesort_free(Tuplesortstate *state) +{ + /* context swap probably not needed, but let's be safe */ + MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext); + +#ifdef TRACE_SORT + long spaceUsed; + + if (state->tapeset) + spaceUsed = LogicalTapeSetBlocks(state->tapeset); + else + spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; +#endif + + /* + * Delete temporary "tape" files, if any. + * + * Note: want to include this in reported total cost of sort, hence need + * for two #ifdef TRACE_SORT sections. + * + * We don't bother to destroy the individual tapes here. They will go away + * with the sortcontext. (In TSS_FINALMERGE state, we have closed + * finished tapes already.) + */ + if (state->tapeset) + LogicalTapeSetClose(state->tapeset); + +#ifdef TRACE_SORT + if (trace_sort) + { + if (state->tapeset) + elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", + SERIAL(state) ? "external sort" : "parallel external sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + else + elog(LOG, "%s of worker %d ended, %ld KB used: %s", + SERIAL(state) ? "internal sort" : "unperformed parallel sort", + state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); + } + + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); +#else + + /* + * If you disabled TRACE_SORT, you can still probe sort__done, but you + * ain't getting space-used stats. + */ + TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); +#endif + + FREESTATE(state); + MemoryContextSwitchTo(oldcontext); + + /* + * Free the per-sort memory context, thereby releasing all working memory. + */ + MemoryContextReset(state->base.sortcontext); +} From 919c74fc4b51080b45222e0d4c75902e5f2725aa Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Tue, 6 Sep 2022 18:15:57 +0300 Subject: [PATCH 151/182] Get rid of copying code from pg core. --- src/rumsort.c | 123 +++++++++++++++---- src/tuplesort16.c | 307 ---------------------------------------------- 2 files changed, 96 insertions(+), 334 deletions(-) delete mode 100644 src/tuplesort16.c diff --git a/src/rumsort.c b/src/rumsort.c index 66d9fd322a..b5691301c9 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -28,8 +28,10 @@ #include "rum.h" /* RumItem */ #if PG_VERSION_NUM >= 160000 -#include "tuplesort16.c" -#undef TRACE_SORT +/* + * After allocating a public interface for Tuplesortstate, no need to include + * source code from pg-core. + */ #elif PG_VERSION_NUM >= 150000 #include "tuplesort15.c" #elif PG_VERSION_NUM >= 140000 @@ -46,6 +48,26 @@ #include "tuplesort96.c" #endif +/* + * In case of using custom compare function we should store function pointer in + * sort stare in order to use it later. + */ + +#if PG_VERSION_NUM >= 160000 +/* + * After allocating a public interface for Tuplesortstate we may use + * TuplesortPublic->arg filed to store pointer to the compare function. + */ + +/* GUC variables */ +#ifdef TRACE_SORT +extern PGDLLIMPORT bool trace_sort; +#endif + +/* All memory management should be inside Tuplesortstate module. */ +#define USEMEM(state,amt) do {} while(0) + +#else /* PG_VERSION_NUM >= 160000 */ /* * We need extra field in a state structure but we should not modify struct * RumTuplesortstate which is inherited from Tuplesortstate core function. @@ -55,6 +77,7 @@ typedef struct RumTuplesortstateExt RumTuplesortstate ts; FmgrInfo *cmp; } RumTuplesortstateExt; +#endif /* PG_VERSION_NUM < 160000 */ static int comparetup_rum(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state, bool compareItemPointer); @@ -70,12 +93,18 @@ static void copytup_rumitem(RumTuplesortstate *state, SortTuple *stup, static void *rum_tuplesort_getrum_internal(RumTuplesortstate *state, bool forward, bool *should_free); +/* + * Tuplesortstate handling should be done through this macro. + */ #if PG_VERSION_NUM >= 160000 # define TSS_GET(state) TuplesortstateGetPublic((state)) #else # define TSS_GET(state) (state) #endif +/* + * Logical tape handling should be done through this macro. + */ #if PG_VERSION_NUM >= 150000 #define LT_TYPE LogicalTape * #define LT_ARG tape @@ -86,6 +115,25 @@ static void *rum_tuplesort_getrum_internal(RumTuplesortstate *state, #define TAPE(state, LT_ARG) state->tapeset, LT_ARG #endif +/* + * Just for convenience and uniformity. + */ +#if PG_VERSION_NUM >= 110000 +#define tuplesort_begin_common(x,y) tuplesort_begin_common((x), NULL, (y)) +#endif + +/* + * Trace log wrapper. + */ +#ifdef TRACE_SORT +# define LOG_SORT(...) \ + if (trace_sort) \ + ereport(LOG, errmsg_internal(__VA_ARGS__)) +#else +# define LOG_SORT(...) \ + {} +#endif + static inline int compare_rum_itempointer(ItemPointerData p1, ItemPointerData p2) { @@ -156,19 +204,29 @@ comparetup_rum_false(const SortTuple *a, const SortTuple *b, return comparetup_rum(a, b, state, false); } +static inline FmgrInfo * +comparetup_rumitem_custom_fun(RumTuplesortstate *state) +{ +#if PG_VERSION_NUM >= 160000 + return (FmgrInfo *) TSS_GET(state)->arg; +#else + return ((RumTuplesortstateExt *) state)->cmp; +#endif +} + static int comparetup_rumitem(const SortTuple *a, const SortTuple *b, RumTuplesortstate *state) { - RumItem *i1, - *i2; - FmgrInfo *cmp; + RumItem *i1, + *i2; + FmgrInfo *cmp; /* Extract RumItem from RumScanItem */ i1 = (RumItem *) a->tuple; i2 = (RumItem *) b->tuple; - cmp = ((RumTuplesortstateExt *) state)->cmp; + cmp = comparetup_rumitem_custom_fun(state); if (cmp != NULL) { if (i1->addInfoIsNull || i2->addInfoIsNull) @@ -242,17 +300,21 @@ writetup_rum_internal(RumTuplesortstate *state, LT_TYPE LT_ARG, void *item = stup->tuple; size_t size = rum_item_size(state); unsigned int writtenlen = size + sizeof(unsigned int); + bool randomAccess; LogicalTapeWrite(TAPE(state, LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); LogicalTapeWrite(TAPE(state, LT_ARG), (void *) item, size); -#if PG_VERSION_NUM >= 150000 - if (TSS_GET(state)->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing - * length word? */ -#else - if (TSS_GET(state)->randomAccess) /* need trailing length word? */ -#endif + + randomAccess = +# if PG_VERSION_NUM >= 150000 + (TSS_GET(state)->sortopt & TUPLESORT_RANDOMACCESS) != 0; +# else + TSS_GET(state)->randomAccess; +# endif + + if (randomAccess) LogicalTapeWrite(TAPE(TSS_GET(state), LT_ARG), (void *) &writtenlen, sizeof(writtenlen)); } @@ -280,6 +342,7 @@ readtup_rum_internal(RumTuplesortstate *state, SortTuple *stup, Assert(tuplen == size); USEMEM(state, GetMemoryChunkSpace(item)); + #if PG_VERSION_NUM >= 150000 LogicalTapeReadExact(LT_ARG, item, size); #else @@ -316,10 +379,6 @@ readtup_rumitem(RumTuplesortstate *state, SortTuple *stup, LT_TYPE LT_ARG, readtup_rum_internal(state, stup, LT_ARG, len, true); } -#if PG_VERSION_NUM >= 110000 -#define tuplesort_begin_common(x,y) tuplesort_begin_common((x), NULL, (y)) -#endif - RumTuplesortstate * rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, bool compareItemPointer) @@ -336,12 +395,8 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, oldcontext = MemoryContextSwitchTo(TSS_GET(state)->sortcontext); -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin rum sort: nKeys = %d, workMem = %d, randomAccess = %c", + LOG_SORT("begin rum sort: nKeys = %d, workMem = %d, randomAccess = %c", nKeys, workMem, randomAccess ? 't' : 'f'); -#endif TSS_GET(state)->nKeys = nKeys; TSS_GET(state)->comparetup = compareItemPointer ? comparetup_rum_true : @@ -357,6 +412,23 @@ rum_tuplesort_begin_rum(int workMem, int nKeys, bool randomAccess, RumTuplesortstate * rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) { +#if PG_VERSION_NUM >= 160000 + RumTuplesortstate *state = tuplesort_begin_common(workMem, false); + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(TSS_GET(state)->sortcontext); + + LOG_SORT("begin rumitem sort: workMem = %d", workMem); + + TSS_GET(state)->comparetup = comparetup_rumitem; + TSS_GET(state)->writetup = writetup_rumitem; + TSS_GET(state)->readtup = readtup_rumitem; + TSS_GET(state)->arg = cmp; + + MemoryContextSwitchTo(oldcontext); + + return state; +#else RumTuplesortstate *state = tuplesort_begin_common(workMem, false); RumTuplesortstateExt *rs; MemoryContext oldcontext; @@ -366,11 +438,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) /* Allocate extended state in the same context as state */ rs = palloc(sizeof(*rs)); -#ifdef TRACE_SORT - if (trace_sort) - elog(LOG, - "begin rumitem sort: workMem = %d", workMem); -#endif + LOG_SORT("begin rumitem sort: workMem = %d", workMem); rs->cmp = cmp; TSS_GET(state)->comparetup = comparetup_rumitem; @@ -383,6 +451,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) MemoryContextSwitchTo(oldcontext); return (RumTuplesortstate *) rs; +#endif } /* @@ -397,7 +466,7 @@ rum_tuplesort_begin_rumitem(int workMem, FmgrInfo *cmp) void rum_tuplesort_end(RumTuplesortstate *state) { -#if PG_VERSION_NUM >= 130000 +#if PG_VERSION_NUM < 160000 && PG_VERSION_NUM >= 130000 tuplesort_free(state); #else tuplesort_end(state); diff --git a/src/tuplesort16.c b/src/tuplesort16.c deleted file mode 100644 index 694d8f1a83..0000000000 --- a/src/tuplesort16.c +++ /dev/null @@ -1,307 +0,0 @@ -/*------------------------------------------------------------------------- - * - * tuplesort16.c - * This file is a copy-paste from src/backend/utils/sort/tuplesort.c - * - * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * IDENTIFICATION - * rum/tuplesort16.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres.h" - -#include - -#include "catalog/pg_am.h" -#include "commands/tablespace.h" -#include "executor/executor.h" -#include "miscadmin.h" -#include "pg_trace.h" -#include "storage/shmem.h" -#include "utils/memutils.h" -#include "utils/pg_rusage.h" -#include "utils/rel.h" -#include "utils/tuplesort.h" - -/* GUC variables */ -#ifdef TRACE_SORT -bool trace_sort = false; -#endif - -/* - * During merge, we use a pre-allocated set of fixed-size slots to hold - * tuples. To avoid palloc/pfree overhead. - * - * Merge doesn't require a lot of memory, so we can afford to waste some, - * by using gratuitously-sized slots. If a tuple is larger than 1 kB, the - * palloc() overhead is not significant anymore. - * - * 'nextfree' is valid when this chunk is in the free list. When in use, the - * slot holds a tuple. - */ -#define SLAB_SLOT_SIZE 1024 - -typedef union SlabSlot -{ - union SlabSlot *nextfree; - char buffer[SLAB_SLOT_SIZE]; -} SlabSlot; - -/* - * Possible states of a Tuplesort object. These denote the states that - * persist between calls of Tuplesort routines. - */ -typedef enum -{ - TSS_INITIAL, /* Loading tuples; still within memory limit */ - TSS_BOUNDED, /* Loading tuples into bounded-size heap */ - TSS_BUILDRUNS, /* Loading tuples; writing to tape */ - TSS_SORTEDINMEM, /* Sort completed entirely in memory */ - TSS_SORTEDONTAPE, /* Sort completed, final run is on tape */ - TSS_FINALMERGE /* Performing final merge on-the-fly */ -} TupSortStatus; - -/* - * Parameters for calculation of number of tapes to use --- see inittapes() - * and tuplesort_merge_order(). - * - * In this calculation we assume that each tape will cost us about 1 blocks - * worth of buffer space. This ignores the overhead of all the other data - * structures needed for each tape, but it's probably close enough. - * - * MERGE_BUFFER_SIZE is how much buffer space we'd like to allocate for each - * input tape, for pre-reading (see discussion at top of file). This is *in - * addition to* the 1 block already included in TAPE_BUFFER_OVERHEAD. - */ -#define MINORDER 6 /* minimum merge order */ -#define MAXORDER 500 /* maximum merge order */ -#define TAPE_BUFFER_OVERHEAD BLCKSZ -#define MERGE_BUFFER_SIZE (BLCKSZ * 32) - - -/* - * Private state of a Tuplesort operation. - */ -struct Tuplesortstate -{ - TuplesortPublic base; - TupSortStatus status; /* enumerated value as shown above */ - bool bounded; /* did caller specify a maximum number of - * tuples to return? */ - bool boundUsed; /* true if we made use of a bounded heap */ - int bound; /* if bounded, the maximum number of tuples */ - int64 availMem; /* remaining memory available, in bytes */ - int64 allowedMem; /* total memory allowed, in bytes */ - int maxTapes; /* max number of input tapes to merge in each - * pass */ - int64 maxSpace; /* maximum amount of space occupied among sort - * of groups, either in-memory or on-disk */ - bool isMaxSpaceDisk; /* true when maxSpace is value for on-disk - * space, false when it's value for in-memory - * space */ - TupSortStatus maxSpaceStatus; /* sort status when maxSpace was reached */ - LogicalTapeSet *tapeset; /* logtape.c object for tapes in a temp file */ - - /* - * This array holds the tuples now in sort memory. If we are in state - * INITIAL, the tuples are in no particular order; if we are in state - * SORTEDINMEM, the tuples are in final sorted order; in states BUILDRUNS - * and FINALMERGE, the tuples are organized in "heap" order per Algorithm - * H. In state SORTEDONTAPE, the array is not used. - */ - SortTuple *memtuples; /* array of SortTuple structs */ - int memtupcount; /* number of tuples currently present */ - int memtupsize; /* allocated length of memtuples array */ - bool growmemtuples; /* memtuples' growth still underway? */ - - /* - * Memory for tuples is sometimes allocated using a simple slab allocator, - * rather than with palloc(). Currently, we switch to slab allocation - * when we start merging. Merging only needs to keep a small, fixed - * number of tuples in memory at any time, so we can avoid the - * palloc/pfree overhead by recycling a fixed number of fixed-size slots - * to hold the tuples. - * - * For the slab, we use one large allocation, divided into SLAB_SLOT_SIZE - * slots. The allocation is sized to have one slot per tape, plus one - * additional slot. We need that many slots to hold all the tuples kept - * in the heap during merge, plus the one we have last returned from the - * sort, with tuplesort_gettuple. - * - * Initially, all the slots are kept in a linked list of free slots. When - * a tuple is read from a tape, it is put to the next available slot, if - * it fits. If the tuple is larger than SLAB_SLOT_SIZE, it is palloc'd - * instead. - * - * When we're done processing a tuple, we return the slot back to the free - * list, or pfree() if it was palloc'd. We know that a tuple was - * allocated from the slab, if its pointer value is between - * slabMemoryBegin and -End. - * - * When the slab allocator is used, the USEMEM/LACKMEM mechanism of - * tracking memory usage is not used. - */ - bool slabAllocatorUsed; - - char *slabMemoryBegin; /* beginning of slab memory arena */ - char *slabMemoryEnd; /* end of slab memory arena */ - SlabSlot *slabFreeHead; /* head of free list */ - - /* Memory used for input and output tape buffers. */ - size_t tape_buffer_mem; - - /* - * When we return a tuple to the caller in tuplesort_gettuple_XXX, that - * came from a tape (that is, in TSS_SORTEDONTAPE or TSS_FINALMERGE - * modes), we remember the tuple in 'lastReturnedTuple', so that we can - * recycle the memory on next gettuple call. - */ - void *lastReturnedTuple; - - /* - * While building initial runs, this is the current output run number. - * Afterwards, it is the number of initial runs we made. - */ - int currentRun; - - /* - * Logical tapes, for merging. - * - * The initial runs are written in the output tapes. In each merge pass, - * the output tapes of the previous pass become the input tapes, and new - * output tapes are created as needed. When nInputTapes equals - * nInputRuns, there is only one merge pass left. - */ - LogicalTape **inputTapes; - int nInputTapes; - int nInputRuns; - - LogicalTape **outputTapes; - int nOutputTapes; - int nOutputRuns; - - LogicalTape *destTape; /* current output tape */ - - /* - * These variables are used after completion of sorting to keep track of - * the next tuple to return. (In the tape case, the tape's current read - * position is also critical state.) - */ - LogicalTape *result_tape; /* actual tape of finished output */ - int current; /* array index (only used if SORTEDINMEM) */ - bool eof_reached; /* reached EOF (needed for cursors) */ - - /* markpos_xxx holds marked position for mark and restore */ - long markpos_block; /* tape block# (only used if SORTEDONTAPE) */ - int markpos_offset; /* saved "current", or offset in tape block */ - bool markpos_eof; /* saved "eof_reached" */ - - /* - * These variables are used during parallel sorting. - * - * worker is our worker identifier. Follows the general convention that - * -1 value relates to a leader tuplesort, and values >= 0 worker - * tuplesorts. (-1 can also be a serial tuplesort.) - * - * shared is mutable shared memory state, which is used to coordinate - * parallel sorts. - * - * nParticipants is the number of worker Tuplesortstates known by the - * leader to have actually been launched, which implies that they must - * finish a run that the leader needs to merge. Typically includes a - * worker state held by the leader process itself. Set in the leader - * Tuplesortstate only. - */ - int worker; - Sharedsort *shared; - int nParticipants; - - /* - * Additional state for managing "abbreviated key" sortsupport routines - * (which currently may be used by all cases except the hash index case). - * Tracks the intervals at which the optimization's effectiveness is - * tested. - */ - int64 abbrevNext; /* Tuple # at which to next check - * applicability */ - - /* - * Resource snapshot for time of sort start. - */ -#ifdef TRACE_SORT - PGRUsage ru_start; -#endif -}; - -#define FREESTATE(state) ((state)->base.freestate ? (*(state)->base.freestate) (state) : (void) 0) -#define USEMEM(state,amt) ((state)->availMem -= (amt)) -#define SERIAL(state) ((state)->shared == NULL) - -/* - * tuplesort_free - * - * Internal routine for freeing resources of tuplesort. - */ -static void -tuplesort_free(Tuplesortstate *state) -{ - /* context swap probably not needed, but let's be safe */ - MemoryContext oldcontext = MemoryContextSwitchTo(state->base.sortcontext); - -#ifdef TRACE_SORT - long spaceUsed; - - if (state->tapeset) - spaceUsed = LogicalTapeSetBlocks(state->tapeset); - else - spaceUsed = (state->allowedMem - state->availMem + 1023) / 1024; -#endif - - /* - * Delete temporary "tape" files, if any. - * - * Note: want to include this in reported total cost of sort, hence need - * for two #ifdef TRACE_SORT sections. - * - * We don't bother to destroy the individual tapes here. They will go away - * with the sortcontext. (In TSS_FINALMERGE state, we have closed - * finished tapes already.) - */ - if (state->tapeset) - LogicalTapeSetClose(state->tapeset); - -#ifdef TRACE_SORT - if (trace_sort) - { - if (state->tapeset) - elog(LOG, "%s of worker %d ended, %ld disk blocks used: %s", - SERIAL(state) ? "external sort" : "parallel external sort", - state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); - else - elog(LOG, "%s of worker %d ended, %ld KB used: %s", - SERIAL(state) ? "internal sort" : "unperformed parallel sort", - state->worker, spaceUsed, pg_rusage_show(&state->ru_start)); - } - - TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, spaceUsed); -#else - - /* - * If you disabled TRACE_SORT, you can still probe sort__done, but you - * ain't getting space-used stats. - */ - TRACE_POSTGRESQL_SORT_DONE(state->tapeset != NULL, 0L); -#endif - - FREESTATE(state); - MemoryContextSwitchTo(oldcontext); - - /* - * Free the per-sort memory context, thereby releasing all working memory. - */ - MemoryContextReset(state->base.sortcontext); -} From 775de3bc1af14d53f9f6d4dd7882f96c7187cb84 Mon Sep 17 00:00:00 2001 From: Jacqui Shadforth Date: Sun, 18 Sep 2022 20:22:07 +0100 Subject: [PATCH 152/182] Update README with small language tweaks --- README.md | 67 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 667e1a3586..5f3e6887cf 100644 --- a/README.md +++ b/README.md @@ -8,38 +8,38 @@ ## Introduction -The **rum** module provides access method to work with `RUM` index. It is based -on the `GIN` access methods code. +The **rum** module provides an access method to work with a `RUM` index. It is based +on the `GIN` access method's code. -`GIN` index allows to perform fast full text search using `tsvector` and -`tsquery` types. But full text search with GIN index has several problems: +A `GIN` index allows performing fast full-text search using `tsvector` and +`tsquery` types. But full-text search with a GIN index has several problems: -- Slow ranking. It is need position information about lexems to ranking. `GIN` -index doesn't store positions of lexems. So after index scan we need additional -heap scan to retrieve lexems positions. -- Slow phrase search with `GIN` index. This problem relates with previous -problem. It is need position information to perform phrase search. -- Slow ordering by timestamp. `GIN` index can't store some related information -in index with lexemes. So it is necessary to perform additional heap scan. +- Slow ranking. It needs positional information about lexemes to do ranking. A `GIN` +index doesn't store positions of lexemes. So after index scanning, we need an +additional heap scan to retrieve lexeme positions. +- Slow phrase search with a `GIN` index. This problem relates to the previous +problem. It needs positional information to perform phrase search. +- Slow ordering by timestamp. A `GIN` index can't store some related information +in the index with lexemes. So it is necessary to perform an additional heap scan. -`RUM` solves this problems by storing additional information in posting tree. +`RUM` solves these problems by storing additional information in a posting tree. For example, positional information of lexemes or timestamps. You can get an -idea of `RUM` by the following picture: +idea of `RUM` with the following diagram: ![How RUM stores additional information](img/gin_rum.png) -Drawback of `RUM` is that it has slower build and insert time than `GIN`. -It is because we need to store additional information besides keys and because -`RUM` uses generic WAL records. +A drawback of `RUM` is that it has slower build and insert times than `GIN`. +This is because we need to store additional information besides keys and because +`RUM` uses generic Write-Ahead Log (WAL) records. ## License -This module available under the [license](LICENSE) similar to +This module is available under the [license](LICENSE) similar to [PostgreSQL](http://www.postgresql.org/about/licence/). ## Installation -Before build and install **rum** you should ensure following: +Before building and installing **rum**, you should ensure following are installed: * PostgreSQL version is 9.6+. @@ -62,7 +62,7 @@ Typical installation procedure may look like this: ## Common operators and functions -**rum** module provides next operators. +The **rum** module provides next operators. | Operator | Returns | Description | -------------------- | ------- | ---------------------------------------------- @@ -71,19 +71,19 @@ Typical installation procedure may look like this: | timestamp <=| timestamp | float8 | Returns distance only for left timestamps. | timestamp |=> timestamp | float8 | Returns distance only for right timestamps. -Last three operations also works for types timestamptz, int2, int4, int8, float4, float8, +The last three operations also work for types timestamptz, int2, int4, int8, float4, float8, money and oid. ## Operator classes -**rum** provides next operator classes. +**rum** provides the following operator classes. ### rum_tsvector_ops For type: `tsvector` -This operator class stores `tsvector` lexemes with positional information. Supports -ordering by `<=>` operator and prefix search. There is the example. +This operator class stores `tsvector` lexemes with positional information. It supports +ordering by the `<=>` operator and prefix search. See the example below. Let us assume we have the table: @@ -140,8 +140,8 @@ SELECT t, a <=> to_tsquery('english', 'place | situation') AS rank For type: `tsvector` -This operator class stores hash of `tsvector` lexemes with positional information. -Supports ordering by `<=>` operator. But **doesn't** support prefix search. +This operator class stores a hash of `tsvector` lexemes with positional information. +It supports ordering by the `<=>` operator. It **doesn't** support prefix search. ### rum_TYPE_ops @@ -153,17 +153,18 @@ Supported operations: `<`, `<=`, `=`, `>=`, `>` for all types and `<=>`, `<=|` and `|=>` for int2, int4, int8, float4, float8, money, oid, timestamp and timestamptz types. -Supports ordering by `<=>`, `<=|` and `|=>` operators. Can be used with +This operator supports ordering by the `<=>`, `<=|` and `|=>` operators. It can be used with `rum_tsvector_addon_ops`, `rum_tsvector_hash_addon_ops' and `rum_anyarray_addon_ops` operator classes. ### rum_tsvector_addon_ops For type: `tsvector` -This operator class stores `tsvector` lexems with any supported by module -field. There is the example. +This operator class stores `tsvector` lexemes with any supported by module +field. See the example below. Let us assume we have the table: + ```sql CREATE TABLE tsts (id int, t tsvector, d timestamp); @@ -202,16 +203,16 @@ SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY For type: `tsvector` -This operator class stores hash of `tsvector` lexems with any supported by module +This operator class stores a hash of `tsvector` lexemes with any supported by module field. -**Doesn't** support prefix search. +It **doesn't** support prefix search. ### rum_tsquery_ops For type: `tsquery` -Stores branches of query tree in additional information. For example we have the table: +It stores branches of query tree in additional information. For example, we have the table: ```sql CREATE TABLE query (q tsquery, tag text); @@ -240,8 +241,8 @@ SELECT * FROM query For type: `anyarray` This operator class stores `anyarray` elements with length of the array. -Supports operators `&&`, `@>`, `<@`, `=`, `%` operators. Supports ordering by `<=>` operator. -For example we have the table: +It supports operators `&&`, `@>`, `<@`, `=`, `%` operators. It also supports ordering by `<=>` operator. +For example, we have the table: ```sql CREATE TABLE test_array (i int2[]); From 43465cba642788d069058d3adf65343e1aedbb1d Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Mon, 21 Nov 2022 18:17:48 +0300 Subject: [PATCH 153/182] Fix compiler warnings due to new checks in PostgreSQL 16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See the commit 0fe954c28584169938e5c0738cfaa9930ce77577 (Add -Wshadow=compatible-local to the standard compilation flags) in PostgreSQL 16. src/ruminsert.c: In function ‘rumHeapTupleBulkInsert’: src/ruminsert.c:533:51: warning: declaration of ‘attr’ shadows a previous local [-Wshadow=compatible-local] 533 | Form_pg_attribute attr = RumTupleDescAttr( | ^~~~ src/ruminsert.c:505:27: note: shadowed declaration is here 505 | Form_pg_attribute attr = buildstate->rumstate.addAttrs[attnum - 1]; | ^~~~ src/rumget.c: In function ‘startScanEntry’: src/rumget.c:635:41: warning: declaration of ‘page’ shadows a previous local [-Wshadow=compatible-local] 635 | Page page; | ^~~~ src/rumget.c:548:25: note: shadowed declaration is here 548 | Page page; | ^~~~ src/rumget.c: In function ‘entryGetNextItemList’: src/rumget.c:1054:33: warning: declaration of ‘page’ shadows a previous local [-Wshadow=compatible-local] 1054 | Page page; | ^~~~ src/rumget.c:986:25: note: shadowed declaration is here 986 | Page page; | ^~~~ --- src/rumget.c | 2 -- src/ruminsert.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/rumget.c b/src/rumget.c index 032a8a1d94..70773ed712 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -632,7 +632,6 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) { BlockNumber rootPostingTree = RumGetPostingTree(itup); RumPostingTreeScan *gdi; - Page page; OffsetNumber maxoff, i; Pointer ptr; @@ -1051,7 +1050,6 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) { BlockNumber rootPostingTree = RumGetPostingTree(itup); RumPostingTreeScan *gdi; - Page page; OffsetNumber maxoff, i; Pointer ptr; diff --git a/src/ruminsert.c b/src/ruminsert.c index 5fe33c38f7..e8b209dadf 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -530,11 +530,11 @@ rumHeapTupleBulkInsert(RumBuildState * buildstate, OffsetNumber attnum, /* Check existance of additional information attribute in index */ if (!attr) { - Form_pg_attribute attr = RumTupleDescAttr( + Form_pg_attribute current_attr = RumTupleDescAttr( buildstate->rumstate.origTupdesc, attnum - 1); elog(ERROR, "additional information attribute \"%s\" is not found in index", - NameStr(attr->attname)); + NameStr(current_attr->attname)); } addInfo[i] = datumCopy(addInfo[i], attr->attbyval, attr->attlen); From bdfafedf7facbd077f0811f73ca0b961f4792903 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Mon, 19 Dec 2022 14:33:52 +0300 Subject: [PATCH 154/182] Add PG15 into travis --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8b87257d44..d23094f134 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,8 +23,8 @@ notifications: on_failure: always env: - - PG_VERSION=15beta1 - - PG_VERSION=15beta1 LEVEL=hardcore + - PG_VERSION=15 + - PG_VERSION=15 LEVEL=hardcore - PG_VERSION=14 - PG_VERSION=14 LEVEL=hardcore - PG_VERSION=13 From 41d60e823e23fa7c90e8cb5544592986b2ab78c3 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Mon, 19 Dec 2022 14:33:52 +0300 Subject: [PATCH 155/182] Add PG15 into travis --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 8b87257d44..d23094f134 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,8 +23,8 @@ notifications: on_failure: always env: - - PG_VERSION=15beta1 - - PG_VERSION=15beta1 LEVEL=hardcore + - PG_VERSION=15 + - PG_VERSION=15 LEVEL=hardcore - PG_VERSION=14 - PG_VERSION=14 LEVEL=hardcore - PG_VERSION=13 From 51c6af43f15c9368afe2bfaad365f483a4cb6d68 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Mon, 19 Dec 2022 16:32:11 +0300 Subject: [PATCH 156/182] Disable core.DivideZero Since all the errors are from pg-core code (i.e. tuplesortXX.c) --- travis/run_tests.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/travis/run_tests.sh b/travis/run_tests.sh index 5dff578c0e..a1ea1e3e45 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -64,6 +64,7 @@ if [ "$LEVEL" = "hardcore" ]; then # perform static analyzis scan-build --status-bugs \ -disable-checker core.UndefinedBinaryOperatorResult \ + -disable-checker core.DivideZero \ -disable-checker deadcode.DeadStores \ make USE_PGXS=1 || status=$? From 61443b9309988858781bdee259d4722c30ccc1c3 Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Fri, 25 Nov 2022 16:40:37 +0300 Subject: [PATCH 157/182] [PGPRO-7470] Added PGDLLEXPORT due to commit 8cf64d35 Tags: rum --- src/rum.h | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/rum.h b/src/rum.h index c242e699e4..7bcfa18221 100644 --- a/src/rum.h +++ b/src/rum.h @@ -415,7 +415,7 @@ extern bytea *rumoptions(Datum reloptions, bool validate); extern bool rumproperty(Oid index_oid, int attno, IndexAMProperty prop, const char *propname, bool *res, bool *isnull); -extern Datum rumhandler(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rumhandler(PG_FUNCTION_ARGS); extern void initRumState(RumState * state, Relation index); extern Buffer RumNewBuffer(Relation index); extern void RumInitBuffer(GenericXLogState *state, Buffer buffer, uint32 flags, @@ -836,16 +836,16 @@ extern RumItem *rumGetBAEntry(BuildAccumulator *accum, #define RUM_ADDINFO_JOIN 10 #define RUMNProcs 10 -extern Datum rum_extract_tsvector(PG_FUNCTION_ARGS); -extern Datum rum_extract_tsquery(PG_FUNCTION_ARGS); -extern Datum rum_tsvector_config(PG_FUNCTION_ARGS); -extern Datum rum_tsquery_pre_consistent(PG_FUNCTION_ARGS); -extern Datum rum_tsquery_distance(PG_FUNCTION_ARGS); -extern Datum rum_ts_distance_tt(PG_FUNCTION_ARGS); -extern Datum rum_ts_distance_ttf(PG_FUNCTION_ARGS); -extern Datum rum_ts_distance_td(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_extract_tsvector(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_extract_tsquery(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_tsvector_config(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_tsquery_pre_consistent(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_tsquery_distance(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_ts_distance_tt(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_ts_distance_ttf(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_ts_distance_td(PG_FUNCTION_ARGS); -extern Datum tsquery_to_distance_query(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum tsquery_to_distance_query(PG_FUNCTION_ARGS); /* rum_arr_utils.c */ typedef enum SimilarityType @@ -858,13 +858,13 @@ typedef enum SimilarityType #define RUM_SIMILARITY_FUNCTION_DEFAULT SMT_COSINE #define RUM_SIMILARITY_THRESHOLD_DEFAULT 0.5 -extern Datum rum_anyarray_config(PG_FUNCTION_ARGS); -extern Datum rum_extract_anyarray(PG_FUNCTION_ARGS); -extern Datum rum_extract_anyarray_query(PG_FUNCTION_ARGS); -extern Datum rum_anyarray_consistent(PG_FUNCTION_ARGS); -extern Datum rum_anyarray_ordering(PG_FUNCTION_ARGS); -extern Datum rum_anyarray_similar(PG_FUNCTION_ARGS); -extern Datum rum_anyarray_distance(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_anyarray_config(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_extract_anyarray(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_extract_anyarray_query(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_anyarray_consistent(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_anyarray_ordering(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_anyarray_similar(PG_FUNCTION_ARGS); +extern PGDLLEXPORT Datum rum_anyarray_distance(PG_FUNCTION_ARGS); /* GUC parameters */ From bfd9eba74067bedad0b4793138f056c1c16271ea Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Mon, 4 Jul 2022 16:21:00 +0300 Subject: [PATCH 158/182] [PGPRO-6780] Fix Windows warnings --- src/rum.h | 6 +++--- src/rum_ts_utils.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/rum.h b/src/rum.h index 1b5b428e27..c242e699e4 100644 --- a/src/rum.h +++ b/src/rum.h @@ -868,9 +868,9 @@ extern Datum rum_anyarray_distance(PG_FUNCTION_ARGS); /* GUC parameters */ -extern PGDLLIMPORT int RumFuzzySearchLimit; -extern PGDLLIMPORT float8 RumArraySimilarityThreshold; -extern PGDLLIMPORT int RumArraySimilarityFunction; +extern int RumFuzzySearchLimit; +extern float8 RumArraySimilarityThreshold; +extern int RumArraySimilarityFunction; /* diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 97e3c37fda..62ccacf7c7 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -172,7 +172,7 @@ typedef struct DocRepresentation *end; } Extention; -static float weights[] = {1.0/0.1f, 1.0/0.2f, 1.0/0.4f, 1.0/1.0f}; +static float weights[] = {1.0f/0.1f, 1.0f/0.2f, 1.0f/0.4f, 1.0f/1.0f}; /* A dummy WordEntryPos array to use when haspos is false */ static WordEntryPosVector POSNULL = { From d387682c0eb222c0c23994dd4f13a7536482780a Mon Sep 17 00:00:00 2001 From: Marina Polyakova Date: Mon, 21 Nov 2022 18:17:48 +0300 Subject: [PATCH 159/182] Fix compiler warnings due to new checks in PostgreSQL 16 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See the commit 0fe954c28584169938e5c0738cfaa9930ce77577 (Add -Wshadow=compatible-local to the standard compilation flags) in PostgreSQL 16. src/ruminsert.c: In function ‘rumHeapTupleBulkInsert’: src/ruminsert.c:533:51: warning: declaration of ‘attr’ shadows a previous local [-Wshadow=compatible-local] 533 | Form_pg_attribute attr = RumTupleDescAttr( | ^~~~ src/ruminsert.c:505:27: note: shadowed declaration is here 505 | Form_pg_attribute attr = buildstate->rumstate.addAttrs[attnum - 1]; | ^~~~ src/rumget.c: In function ‘startScanEntry’: src/rumget.c:635:41: warning: declaration of ‘page’ shadows a previous local [-Wshadow=compatible-local] 635 | Page page; | ^~~~ src/rumget.c:548:25: note: shadowed declaration is here 548 | Page page; | ^~~~ src/rumget.c: In function ‘entryGetNextItemList’: src/rumget.c:1054:33: warning: declaration of ‘page’ shadows a previous local [-Wshadow=compatible-local] 1054 | Page page; | ^~~~ src/rumget.c:986:25: note: shadowed declaration is here 986 | Page page; | ^~~~ --- src/rumget.c | 2 -- src/ruminsert.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/rumget.c b/src/rumget.c index 032a8a1d94..70773ed712 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -632,7 +632,6 @@ startScanEntry(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) { BlockNumber rootPostingTree = RumGetPostingTree(itup); RumPostingTreeScan *gdi; - Page page; OffsetNumber maxoff, i; Pointer ptr; @@ -1051,7 +1050,6 @@ entryGetNextItemList(RumState * rumstate, RumScanEntry entry, Snapshot snapshot) { BlockNumber rootPostingTree = RumGetPostingTree(itup); RumPostingTreeScan *gdi; - Page page; OffsetNumber maxoff, i; Pointer ptr; diff --git a/src/ruminsert.c b/src/ruminsert.c index 5fe33c38f7..e8b209dadf 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -530,11 +530,11 @@ rumHeapTupleBulkInsert(RumBuildState * buildstate, OffsetNumber attnum, /* Check existance of additional information attribute in index */ if (!attr) { - Form_pg_attribute attr = RumTupleDescAttr( + Form_pg_attribute current_attr = RumTupleDescAttr( buildstate->rumstate.origTupdesc, attnum - 1); elog(ERROR, "additional information attribute \"%s\" is not found in index", - NameStr(attr->attname)); + NameStr(current_attr->attname)); } addInfo[i] = datumCopy(addInfo[i], attr->attbyval, attr->attlen); From eceff466e46be8aefed80da2b3df70dcbf9867d9 Mon Sep 17 00:00:00 2001 From: Alexey Savchkov Date: Tue, 31 Jan 2023 14:56:46 +0700 Subject: [PATCH 160/182] Switch to Ubuntu 22.04 in Travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index d23094f134..7468e754b1 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ os: linux -dist: bionic +dist: jammy language: c From 3eecc1492454fdc9bf887d5cb1fd441a254985e9 Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Tue, 14 Feb 2023 18:11:47 +0300 Subject: [PATCH 161/182] Add tap test run in installcheck --- Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile b/Makefile index 570c44fdba..ee219590ac 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,8 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp orderby orderby_ha macaddr inet cidr text varchar char bytea bit varbit \ numeric rum_weight +TAP_TESTS = 1 + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) From 5a405c07c6d77958e12270908aa2d528289a3f8e Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Wed, 15 Feb 2023 11:26:38 +0300 Subject: [PATCH 162/182] Fix double tap test run on pg12+ --- Makefile | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Makefile b/Makefile index ee219590ac..89c81c96a9 100644 --- a/Makefile +++ b/Makefile @@ -51,10 +51,27 @@ else REGRESS += array endif +# For 9.6-11 we have to make specific target with tap tests +SPECIFIC_TAP = + +ifeq ($(MAJORVERSION), 9.6) +SPECIFIC_TAP = yes +endif + +ifeq ($(MAJORVERSION), 10) +SPECIFIC_TAP = yes +endif + +ifeq ($(MAJORVERSION), 11) +SPECIFIC_TAP = yes +endif + +ifdef SPECIFIC_TAP wal-check: temp-install $(prove_check) check: wal-check +endif all: $(SQL_built) From c40b86eb70b3a2cba5d041312d495220a0099add Mon Sep 17 00:00:00 2001 From: Maxim Orlov Date: Wed, 15 Feb 2023 14:36:40 +0300 Subject: [PATCH 163/182] Compacting SPECIFIC_TAP conditions --- Makefile | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 89c81c96a9..1341f79b75 100644 --- a/Makefile +++ b/Makefile @@ -52,21 +52,7 @@ REGRESS += array endif # For 9.6-11 we have to make specific target with tap tests -SPECIFIC_TAP = - -ifeq ($(MAJORVERSION), 9.6) -SPECIFIC_TAP = yes -endif - -ifeq ($(MAJORVERSION), 10) -SPECIFIC_TAP = yes -endif - -ifeq ($(MAJORVERSION), 11) -SPECIFIC_TAP = yes -endif - -ifdef SPECIFIC_TAP +ifeq ($(MAJORVERSION), $(filter 9.6% 10% 11%, $(MAJORVERSION))) wal-check: temp-install $(prove_check) From 8c490862ca0a7fbfa8bf438e7f40efddcea99958 Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Tue, 3 Oct 2023 23:15:25 +0300 Subject: [PATCH 164/182] [PGPRO-8894] Correction of expression calculation for case recheckPhrase=true --- Makefile | 2 +- expected/expr.out | 26 ++++++++++++++++++++++++++ sql/expr.sql | 21 +++++++++++++++++++++ src/rum_ts_utils.c | 9 ++++++++- 4 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 expected/expr.out create mode 100644 sql/expr.sql diff --git a/Makefile b/Makefile index 1341f79b75..f24246528f 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp orderby orderby_ha int2 int4 int8 float4 float8 money oid \ time timetz date interval \ macaddr inet cidr text varchar char bytea bit varbit \ - numeric rum_weight + numeric rum_weight expr TAP_TESTS = 1 diff --git a/expected/expr.out b/expected/expr.out new file mode 100644 index 0000000000..b57de73ff4 --- /dev/null +++ b/expected/expr.out @@ -0,0 +1,26 @@ +CREATE TABLE documents ( + en text not null, + score float not null, + textsearch_index_en_col tsvector +); +INSERT INTO documents VALUES ('the pet cat is in the shed', 56, to_tsvector('english', 'the pet cat is in the shed')); +CREATE INDEX textsearch_index_en ON documents + USING rum (textsearch_index_en_col rum_tsvector_addon_ops, score) + WITH (attach = 'score', to = 'textsearch_index_en_col'); +SET enable_seqscan=off; +-- should be 1 row +SELECT * FROM documents WHERE textsearch_index_en_col @@ ('pet'::tsquery <-> ('dog'::tsquery || 'cat'::tsquery)); + en | score | textsearch_index_en_col +----------------------------+-------+-------------------------- + the pet cat is in the shed | 56 | 'cat':3 'pet':2 'shed':7 +(1 row) + +SET enable_seqscan=on; +-- 1 row +SELECT * FROM documents WHERE textsearch_index_en_col @@ ('pet'::tsquery <-> ('dog'::tsquery || 'cat'::tsquery)); + en | score | textsearch_index_en_col +----------------------------+-------+-------------------------- + the pet cat is in the shed | 56 | 'cat':3 'pet':2 'shed':7 +(1 row) + +DROP TABLE documents; diff --git a/sql/expr.sql b/sql/expr.sql new file mode 100644 index 0000000000..d7b7ee3d24 --- /dev/null +++ b/sql/expr.sql @@ -0,0 +1,21 @@ +CREATE TABLE documents ( + en text not null, + score float not null, + textsearch_index_en_col tsvector +); + +INSERT INTO documents VALUES ('the pet cat is in the shed', 56, to_tsvector('english', 'the pet cat is in the shed')); + +CREATE INDEX textsearch_index_en ON documents + USING rum (textsearch_index_en_col rum_tsvector_addon_ops, score) + WITH (attach = 'score', to = 'textsearch_index_en_col'); + +SET enable_seqscan=off; +-- should be 1 row +SELECT * FROM documents WHERE textsearch_index_en_col @@ ('pet'::tsquery <-> ('dog'::tsquery || 'cat'::tsquery)); + +SET enable_seqscan=on; +-- 1 row +SELECT * FROM documents WHERE textsearch_index_en_col @@ ('pet'::tsquery <-> ('dog'::tsquery || 'cat'::tsquery)); + +DROP TABLE documents; diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 62ccacf7c7..3133a92c5a 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -284,7 +284,14 @@ checkcondition_rum(void *checkval, QueryOperand *val, ExecPhraseData *data) * addInfo */ if (gcv->recheckPhrase) - return ((val->weight) ? TS_MAYBE : TS_YES); + { + /* + * We cannot return TS_YES here (if "val->weight > 0"), because + * data->npos = 0 and we have incorrect porocessing of this result + * at the upper levels. So return TS_MAYBE. + */ + return TS_MAYBE; + } positions = DatumGetByteaP(gcv->addInfo[j]); ptrt = (char *) VARDATA_ANY(positions); From 61258f4cfd38e177912aa635e554fabc98dbdc5d Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Wed, 4 Oct 2023 00:38:42 +0300 Subject: [PATCH 165/182] Changes for travis-ci --- .travis.yml | 4 ++-- travis/Dockerfile.in | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.travis.yml b/.travis.yml index d23094f134..fd57d799d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,8 @@ notifications: on_failure: always env: + - PG_VERSION=16 + - PG_VERSION=16 LEVEL=hardcore - PG_VERSION=15 - PG_VERSION=15 LEVEL=hardcore - PG_VERSION=14 @@ -33,5 +35,3 @@ env: - PG_VERSION=12 LEVEL=hardcore - PG_VERSION=11 - PG_VERSION=11 LEVEL=hardcore - - PG_VERSION=10 - - PG_VERSION=10 LEVEL=hardcore diff --git a/travis/Dockerfile.in b/travis/Dockerfile.in index 2bfa60483d..66625248cc 100644 --- a/travis/Dockerfile.in +++ b/travis/Dockerfile.in @@ -4,10 +4,10 @@ FROM postgres:${PG_VERSION}-alpine RUN apk add --no-cache \ linux-headers \ openssl curl \ - perl perl-ipc-run \ + perl perl-ipc-run perl-dev perl-app-cpanminus perl-dbi \ make musl-dev gcc bison flex coreutils \ zlib-dev libedit-dev \ - clang clang-analyzer; + pkgconf icu-dev clang clang15 clang-analyzer; # Environment ENV LANG=C.UTF-8 PGDATA=/pg/data From e6ba1addec6eb598feda94da9e3a1a4de56b1e3a Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Wed, 11 Oct 2023 20:33:31 +0300 Subject: [PATCH 166/182] [PGPRO-8962] Remove unused variable. Variable 'totalCount'. It appeared in c1df4cc1ec1d86b5a06f28dfb0a8360a6f2f652c and was already unnecessary. Tags: rum. --- src/rumdatapage.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 723a8d3aae..43fc1feb65 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -1060,7 +1060,6 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, RumItem maxLeftItem, curItem; RumItem item; - int totalCount = 0; int maxItemIndex = btree->curitem; /* * Must have lpageCopy MAXALIGNed to use PG macros to access data in @@ -1100,7 +1099,6 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, &item, &prevIptr, btree->rumstate, totalsize); maxItemIndex++; - totalCount++; maxItemSize = Max(maxItemSize, totalsize - prevTotalsize); } @@ -1112,7 +1110,6 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, totalsize = rumCheckPlaceToDataPageLeaf(btree->entryAttnum, &item, &prevIptr, btree->rumstate, totalsize); - totalCount++; maxItemSize = Max(maxItemSize, totalsize - prevTotalsize); } @@ -1134,7 +1131,6 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, 2 * RumDataPageSize - 2 * maxItemSize - 2 * MAXIMUM_ALIGNOF) { maxItemIndex++; - totalCount++; maxItemSize = Max(maxItemSize, newTotalsize - totalsize); totalsize = newTotalsize; @@ -1148,8 +1144,6 @@ dataSplitPageLeaf(RumBtree btree, Buffer lbuf, Buffer rbuf, totalsize = rumCheckPlaceToDataPageLeaf(btree->entryAttnum, &item, &prevIptr, btree->rumstate, totalsize); maxItemIndex++; - - totalCount++; } } From 6ccd0525648aac5bd4f39246861ce454e05af22d Mon Sep 17 00:00:00 2001 From: Koval Dmitry Date: Wed, 18 Oct 2023 02:53:18 +0300 Subject: [PATCH 167/182] [PGPRO-9026] Added check for attached column --- expected/rum_validate.out | 12 +++++++++++- sql/rum_validate.sql | 8 +++++++- src/rumutil.c | 3 +++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/expected/rum_validate.out b/expected/rum_validate.out index e662615e87..22000a1ee5 100644 --- a/expected/rum_validate.out +++ b/expected/rum_validate.out @@ -114,9 +114,19 @@ SET enable_indexscan=on; SELECT a FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'bar') - ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0) + ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0); a ------------------------------ 'bar':2,8 'foo':1,3,6 'qq':7 (1 row) +-- PGPRO-9026: column and attached column cannot be the same +CREATE TABLE test_array (i int2[]); +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_addon_ops) WITH (attach = 'i', to = 'i'); +ERROR: column "i" and attached column cannot be the same +SELECT * FROM test_array WHERE i && '{1}'; + i +--- +(0 rows) + +DROP TABLE test_array; diff --git a/sql/rum_validate.sql b/sql/rum_validate.sql index feb8e2765a..455db5db56 100644 --- a/sql/rum_validate.sql +++ b/sql/rum_validate.sql @@ -58,4 +58,10 @@ SET enable_indexscan=on; SELECT a FROM test_rum WHERE a @@ to_tsquery('pg_catalog.english', 'bar') - ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0) + ORDER BY a <=> (to_tsquery('pg_catalog.english', 'bar'),0); + +-- PGPRO-9026: column and attached column cannot be the same +CREATE TABLE test_array (i int2[]); +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_addon_ops) WITH (attach = 'i', to = 'i'); +SELECT * FROM test_array WHERE i && '{1}'; +DROP TABLE test_array; diff --git a/src/rumutil.c b/src/rumutil.c index 8838bd8aeb..9048918d12 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -211,6 +211,9 @@ initRumState(RumState * state, Relation index) if (!AttributeNumberIsValid(state->attrnAddToColumn)) elog(ERROR, "attribute \"%s\" is not found in index", colname); + + if (state->attrnAddToColumn == state->attrnAttachColumn) + elog(ERROR, "column \"%s\" and attached column cannot be the same", colname); } if (!(AttributeNumberIsValid(state->attrnAttachColumn) && From cb1edffc57736cd2a4455f8d0feab0d69928da25 Mon Sep 17 00:00:00 2001 From: alexandervpotapov <144983308+alexandervpotapov@users.noreply.github.com> Date: Mon, 7 Oct 2024 21:03:41 +0300 Subject: [PATCH 168/182] Add compatibility with PostgreSQL 17. (#129) Fix compiler error that was introduced by the commit 6ed83d5f (Use bump memory context for tuplesorts). Fix 'array' test. Fix Travis CI build. --- expected/array.out | 16 + expected/array_1.out | 16 + expected/array_2.out | 899 ++++++++++++++++++++++++++++++++++++++ expected/array_3.out | 892 +++++++++++++++++++++++++++++++++++++ sql/array.sql | 19 +- src/rumsort.c | 28 +- travis/docker-compose.yml | 3 +- 7 files changed, 1869 insertions(+), 4 deletions(-) create mode 100644 expected/array_2.out create mode 100644 expected/array_3.out diff --git a/expected/array.out b/expected/array.out index 4094d98492..fe64c54f24 100644 --- a/expected/array.out +++ b/expected/array.out @@ -1,3 +1,19 @@ +/* + * --------------------------------------------- + * NOTE: This test behaves differenly on PgPro + * --------------------------------------------- + * + * -------------------- + * array.sql and array_1.sql + * -------------------- + * Test output for 64-bit and 32-bit systems respectively. + * + * -------------------- + * array_2.sql and array_3.sql + * -------------------- + * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows + * in the output has been changed. + */ set enable_seqscan=off; set enable_sort=off; /* diff --git a/expected/array_1.out b/expected/array_1.out index ec3abfb693..3f3bf80bf0 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -1,3 +1,19 @@ +/* + * --------------------------------------------- + * NOTE: This test behaves differenly on PgPro + * --------------------------------------------- + * + * -------------------- + * array.sql and array_1.sql + * -------------------- + * Test output for 64-bit and 32-bit systems respectively. + * + * -------------------- + * array_2.sql and array_3.sql + * -------------------- + * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows + * in the output has been changed. + */ set enable_seqscan=off; set enable_sort=off; /* diff --git a/expected/array_2.out b/expected/array_2.out new file mode 100644 index 0000000000..0097a5b5f4 --- /dev/null +++ b/expected/array_2.out @@ -0,0 +1,899 @@ +/* + * --------------------------------------------- + * NOTE: This test behaves differenly on PgPro + * --------------------------------------------- + * + * -------------------- + * array.sql and array_1.sql + * -------------------- + * Test output for 64-bit and 32-bit systems respectively. + * + * -------------------- + * array_2.sql and array_3.sql + * -------------------- + * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows + * in the output has been changed. + */ +set enable_seqscan=off; +set enable_sort=off; +/* + * Complete checks for int2[]. + */ +CREATE TABLE test_array ( + i int2[] +); +INSERT INTO test_array VALUES ('{}'), ('{0}'), ('{1,2,3,4}'), ('{1,2,3}'), ('{1,2}'), ('{1}'); +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +SELECT NULL::int[] = '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] && '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] @> '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] <@ '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] % '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] <=> '{1}'; + ?column? +---------- + +(1 row) + +INSERT INTO test_array VALUES (NULL); +SELECT * FROM test_array WHERE i = '{1}'; + i +----- + {1} +(1 row) + +DELETE FROM test_array WHERE i IS NULL; +SELECT * FROM test_array WHERE i = '{NULL}'; +ERROR: array must not contain nulls +SELECT * FROM test_array WHERE i = '{1,2,3,NULL}'; +ERROR: array must not contain nulls +SELECT * FROM test_array WHERE i = '{{1,2},{3,4}}'; +ERROR: array must have 1 dimension +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i = '{}'; + i +---- + {} +(1 row) + +SELECT * FROM test_array WHERE i = '{0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i = '{1}'; + i +----- + {1} +(1 row) + +SELECT * FROM test_array WHERE i = '{1,2}'; + i +------- + {1,2} +(1 row) + +SELECT * FROM test_array WHERE i = '{2,1}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i = '{1,2,3,3}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i = '{0,0}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i = '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i && '{}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i && '{1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i && '{3}'; + i +----------- + {1,2,3,4} + {1,2,3} +(2 rows) + +SELECT * FROM test_array WHERE i && '{4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i && '{1,2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{1,2,3}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{1,2,3,4}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{4,3,2,1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{0,0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i && '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i @> '{}'; + i +----------- + {} + {0} + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(6 rows) + +SELECT * FROM test_array WHERE i @> '{1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i @> '{2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i @> '{3}'; + i +----------- + {1,2,3,4} + {1,2,3} +(2 rows) + +SELECT * FROM test_array WHERE i @> '{4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{1,2,4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{1,2,3,4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{4,3,2,1}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{0,0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i @> '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i <@ '{}'; + i +---- + {} +(1 row) + +SELECT * FROM test_array WHERE i <@ '{1}'; + i +----- + {} + {1} +(2 rows) + +SELECT * FROM test_array WHERE i <@ '{2}'; + i +---- + {} +(1 row) + +SELECT * FROM test_array WHERE i <@ '{1,2,4}'; + i +------- + {} + {1,2} + {1} +(3 rows) + +SELECT * FROM test_array WHERE i <@ '{1,2,3,4}'; + i +----------- + {} + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(5 rows) + +SELECT * FROM test_array WHERE i <@ '{4,3,2,1}'; + i +----------- + {} + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(5 rows) + +SELECT * FROM test_array WHERE i <@ '{0,0}'; + i +----- + {} + {0} +(2 rows) + +SELECT * FROM test_array WHERE i <@ '{100}'; + i +---- + {} +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i % '{}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i % '{1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i % '{1,2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{1,2,4}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{1,2,3,4}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{4,3,2,1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{1,2,3,4,5}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i % '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i % '{1,10,20,30,40,50}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i % '{1,10,20,30}'; + i +----- + {1} +(1 row) + +SELECT * FROM test_array WHERE i % '{1,1,1,1,1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{0,0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i % '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{1}'::smallint[]) + Order By: (i <=> '{1}'::smallint[]) +(3 rows) + +SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; + i +----------- + {1} + {1,2} + {1,2,3} + {1,2,3,4} +(4 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ADD COLUMN add_info timestamp; +CREATE INDEX idx_array ON test_array +USING rum (i rum_anyarray_addon_ops, add_info) +WITH (attach = 'add_info', to = 'i'); +WITH q as ( + SELECT row_number() OVER (ORDER BY i) idx, ctid FROM test_array +) +UPDATE test_array SET add_info = '2016-05-16 14:21:25'::timestamp + + format('%s days', q.idx)::interval +FROM q WHERE test_array.ctid = q.ctid; +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +---------------------------------- + Seq Scan on test_array + Filter: (i % '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------ + Limit + -> Index Scan using idx_array on test_array + Index Cond: (i && '{1}'::smallint[]) + Order By: (add_info <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(4 rows) + +SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; + i | add_info +-----------+-------------------------- + {1} | Thu May 19 14:21:25 2016 + {1,2} | Fri May 20 14:21:25 2016 + {1,2,3} | Sat May 21 14:21:25 2016 + {1,2,3,4} | Sun May 22 14:21:25 2016 +(4 rows) + +DROP INDEX idx_array; +/* + * Sanity checks for popular array types. + */ +ALTER TABLE test_array ALTER COLUMN i TYPE int4[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::integer[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE int8[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::bigint[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE text[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::text[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE varchar[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::character varying[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE char[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::bpchar[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE numeric[] USING i::numeric[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::numeric[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE float4[] USING i::float4[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::real[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE float8[] USING i::float8[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +---------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +---------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::double precision[]) +(2 rows) + +DROP INDEX idx_array; +/* + * Check ordering using distance operator + */ +CREATE TABLE test_array_order ( + i int2[] +); +\copy test_array_order(i) from 'data/rum_array.data'; +CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; + QUERY PLAN +------------------------------------------------------ + Index Scan using idx_array_order on test_array_order + Index Cond: (i @> '{23,20}'::smallint[]) + Order By: (i <=> '{51}'::smallint[]) +(3 rows) + +SELECT i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + i | distance +---------------------+------------------ + {20,23,51} | 1.73205080756888 + {33,51,20,77,23,65} | 2.44948974278318 + {6,97,20,89,23} | -1 + {20,98,30,23,1,66} | -1 + {57,23,39,46,50,20} | -1 + {81,20,26,22,23} | -1 + {73,23,20} | -1 + {18,23,10,90,15,20} | -1 + {23,76,34,23,2,20} | -1 + {20,60,45,23,29} | -1 + {23,89,38,20,40,95} | -1 + {23,20,72} | -1 +(12 rows) + diff --git a/expected/array_3.out b/expected/array_3.out new file mode 100644 index 0000000000..d5012c3a38 --- /dev/null +++ b/expected/array_3.out @@ -0,0 +1,892 @@ +/* + * --------------------------------------------- + * NOTE: This test behaves differenly on PgPro + * --------------------------------------------- + * + * -------------------- + * array.sql and array_1.sql + * -------------------- + * Test output for 64-bit and 32-bit systems respectively. + * + * -------------------- + * array_2.sql and array_3.sql + * -------------------- + * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows + * in the output has been changed. + */ +set enable_seqscan=off; +set enable_sort=off; +/* + * Complete checks for int2[]. + */ +CREATE TABLE test_array ( + i int2[] +); +INSERT INTO test_array VALUES ('{}'), ('{0}'), ('{1,2,3,4}'), ('{1,2,3}'), ('{1,2}'), ('{1}'); +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +SELECT NULL::int[] = '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] && '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] @> '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] <@ '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] % '{1}'; + ?column? +---------- + +(1 row) + +SELECT NULL::int[] <=> '{1}'; + ?column? +---------- + +(1 row) + +INSERT INTO test_array VALUES (NULL); +SELECT * FROM test_array WHERE i = '{1}'; + i +----- + {1} +(1 row) + +DELETE FROM test_array WHERE i IS NULL; +SELECT * FROM test_array WHERE i = '{NULL}'; +ERROR: array must not contain nulls +SELECT * FROM test_array WHERE i = '{1,2,3,NULL}'; +ERROR: array must not contain nulls +SELECT * FROM test_array WHERE i = '{{1,2},{3,4}}'; +ERROR: array must have 1 dimension +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i = '{}'; + i +---- + {} +(1 row) + +SELECT * FROM test_array WHERE i = '{0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i = '{1}'; + i +----- + {1} +(1 row) + +SELECT * FROM test_array WHERE i = '{1,2}'; + i +------- + {1,2} +(1 row) + +SELECT * FROM test_array WHERE i = '{2,1}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i = '{1,2,3,3}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i = '{0,0}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i = '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i && '{}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i && '{1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i && '{3}'; + i +----------- + {1,2,3,4} + {1,2,3} +(2 rows) + +SELECT * FROM test_array WHERE i && '{4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i && '{1,2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{1,2,3}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{1,2,3,4}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{4,3,2,1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i && '{0,0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i && '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i @> '{}'; + i +----------- + {} + {0} + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(6 rows) + +SELECT * FROM test_array WHERE i @> '{1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i @> '{2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i @> '{3}'; + i +----------- + {1,2,3,4} + {1,2,3} +(2 rows) + +SELECT * FROM test_array WHERE i @> '{4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{1,2,4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{1,2,3,4}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{4,3,2,1}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i @> '{0,0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i @> '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i <@ '{}'; + i +---- + {} +(1 row) + +SELECT * FROM test_array WHERE i <@ '{1}'; + i +----- + {} + {1} +(2 rows) + +SELECT * FROM test_array WHERE i <@ '{2}'; + i +---- + {} +(1 row) + +SELECT * FROM test_array WHERE i <@ '{1,2,4}'; + i +------- + {} + {1,2} + {1} +(3 rows) + +SELECT * FROM test_array WHERE i <@ '{1,2,3,4}'; + i +----------- + {} + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(5 rows) + +SELECT * FROM test_array WHERE i <@ '{4,3,2,1}'; + i +----------- + {} + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(5 rows) + +SELECT * FROM test_array WHERE i <@ '{0,0}'; + i +----- + {} + {0} +(2 rows) + +SELECT * FROM test_array WHERE i <@ '{100}'; + i +---- + {} +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::smallint[]) +(2 rows) + +SELECT * FROM test_array WHERE i % '{}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i % '{1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i % '{1,2}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{1,2,4}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{1,2,3,4}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{4,3,2,1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{1,2,3,4,5}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} +(3 rows) + +SELECT * FROM test_array WHERE i % '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}'; + i +----------- + {1,2,3,4} +(1 row) + +SELECT * FROM test_array WHERE i % '{1,10,20,30,40,50}'; + i +--- +(0 rows) + +SELECT * FROM test_array WHERE i % '{1,10,20,30}'; + i +----- + {1} +(1 row) + +SELECT * FROM test_array WHERE i % '{1,1,1,1,1}'; + i +----------- + {1,2,3,4} + {1,2,3} + {1,2} + {1} +(4 rows) + +SELECT * FROM test_array WHERE i % '{0,0}'; + i +----- + {0} +(1 row) + +SELECT * FROM test_array WHERE i % '{100}'; + i +--- +(0 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{1}'::smallint[]) + Order By: (i <=> '{1}'::smallint[]) +(3 rows) + +SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; + i +----------- + {1} + {1,2} + {1,2,3} + {1,2,3,4} +(4 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ADD COLUMN add_info timestamp; +CREATE INDEX idx_array ON test_array +USING rum (i rum_anyarray_addon_ops, add_info) +WITH (attach = 'add_info', to = 'i'); +WITH q as ( + SELECT row_number() OVER (ORDER BY i) idx, ctid FROM test_array +) +UPDATE test_array SET add_info = '2016-05-16 14:21:25'::timestamp + + format('%s days', q.idx)::interval +FROM q WHERE test_array.ctid = q.ctid; +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +---------------------------------- + Seq Scan on test_array + Filter: (i % '{}'::smallint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; + QUERY PLAN +------------------------------------------------------------------------------------------ + Limit + -> Index Scan using idx_array on test_array + Index Cond: (i && '{1}'::smallint[]) + Order By: (add_info <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) +(4 rows) + +SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; +ERROR: doesn't support order by over pass-by-reference column +DROP INDEX idx_array; +/* + * Sanity checks for popular array types. + */ +ALTER TABLE test_array ALTER COLUMN i TYPE int4[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::integer[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::integer[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE int8[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::bigint[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::bigint[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE text[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::text[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::text[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE varchar[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::character varying[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::character varying[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE char[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::bpchar[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::bpchar[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE numeric[] USING i::numeric[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::numeric[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::numeric[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE float4[] USING i::float4[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::real[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +------------------------------------------ + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::real[]) +(2 rows) + +DROP INDEX idx_array; +ALTER TABLE test_array ALTER COLUMN i TYPE float8[] USING i::float8[]; +CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; + QUERY PLAN +---------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i = '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i && '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i @> '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; + QUERY PLAN +----------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i <@ '{}'::double precision[]) +(2 rows) + +EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; + QUERY PLAN +---------------------------------------------- + Index Scan using idx_array on test_array + Index Cond: (i % '{}'::double precision[]) +(2 rows) + +DROP INDEX idx_array; +/* + * Check ordering using distance operator + */ +CREATE TABLE test_array_order ( + i int2[] +); +\copy test_array_order(i) from 'data/rum_array.data'; +CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +EXPLAIN (COSTS OFF) +SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; + QUERY PLAN +------------------------------------------------------ + Index Scan using idx_array_order on test_array_order + Index Cond: (i @> '{23,20}'::smallint[]) + Order By: (i <=> '{51}'::smallint[]) +(3 rows) + +SELECT i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + i | distance +---------------------+------------------ + {20,23,51} | 1.73205080756888 + {33,51,20,77,23,65} | 2.44948974278318 + {18,23,10,90,15,20} | -1 + {23,76,34,23,2,20} | -1 + {73,23,20} | -1 + {23,89,38,20,40,95} | -1 + {23,20,72} | -1 + {20,60,45,23,29} | -1 + {81,20,26,22,23} | -1 + {6,97,20,89,23} | -1 + {20,98,30,23,1,66} | -1 + {57,23,39,46,50,20} | -1 +(12 rows) + diff --git a/sql/array.sql b/sql/array.sql index 11defc9e07..c17c1252c8 100644 --- a/sql/array.sql +++ b/sql/array.sql @@ -1,7 +1,24 @@ +/* + * --------------------------------------------- + * NOTE: This test behaves differenly on PgPro + * --------------------------------------------- + * + * -------------------- + * array.sql and array_1.sql + * -------------------- + * Test output for 64-bit and 32-bit systems respectively. + * + * -------------------- + * array_2.sql and array_3.sql + * -------------------- + * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows + * in the output has been changed. + */ + + set enable_seqscan=off; set enable_sort=off; - /* * Complete checks for int2[]. */ diff --git a/src/rumsort.c b/src/rumsort.c index b5691301c9..96784cd192 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -488,11 +488,23 @@ rum_tuplesort_putrum(RumTuplesortstate *state, RumSortItem *item) { MemoryContext oldcontext; SortTuple stup; +#if PG_VERSION_NUM >= 170000 + MinimalTuple tuple = (MinimalTuple)item; + Size tuplen; + TuplesortPublic *base = TuplesortstateGetPublic((TuplesortPublic *)state); +#endif oldcontext = MemoryContextSwitchTo(rum_tuplesort_get_memorycontext(state)); copytup_rum(state, &stup, item); -#if PG_VERSION_NUM >= 160000 +#if PG_VERSION_NUM >= 170000 + /* GetMemoryChunkSpace is not supported for bump contexts */ + if (TupleSortUseBumpTupleCxt(base->sortopt)) + tuplen = MAXALIGN(tuple->t_len); + else + tuplen = GetMemoryChunkSpace(tuple); + tuplesort_puttuple_common(state, &stup, false, tuplen); +#elif PG_VERSION_NUM >= 160000 tuplesort_puttuple_common(state, &stup, false); #else puttuple_common(state, &stup); @@ -506,11 +518,23 @@ rum_tuplesort_putrumitem(RumTuplesortstate *state, RumScanItem *item) { MemoryContext oldcontext; SortTuple stup; +#if PG_VERSION_NUM >= 170000 + MinimalTuple tuple = (MinimalTuple)item; + Size tuplen; + TuplesortPublic *base = TuplesortstateGetPublic((TuplesortPublic *)state); +#endif oldcontext = MemoryContextSwitchTo(rum_tuplesort_get_memorycontext(state)); copytup_rumitem(state, &stup, item); -#if PG_VERSION_NUM >= 160000 +#if PG_VERSION_NUM >= 170000 + /* GetMemoryChunkSpace is not supported for bump contexts */ + if (TupleSortUseBumpTupleCxt(base->sortopt)) + tuplen = MAXALIGN(tuple->t_len); + else + tuplen = GetMemoryChunkSpace(tuple); + tuplesort_puttuple_common(state, &stup, false, tuplen); +#elif PG_VERSION_NUM >= 160000 tuplesort_puttuple_common(state, &stup, false); #else puttuple_common(state, &stup); diff --git a/travis/docker-compose.yml b/travis/docker-compose.yml index 471ab779f2..0544d8597d 100644 --- a/travis/docker-compose.yml +++ b/travis/docker-compose.yml @@ -1,2 +1,3 @@ -tests: +services: + tests: build: . From 181b8027349c6f1677734a63141e839451c1015f Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Wed, 23 Oct 2024 15:06:38 +0300 Subject: [PATCH 169/182] Update configuration for Travis CI. --- .travis.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.travis.yml b/.travis.yml index 428c4dc168..7ee9c5d4cd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -23,6 +23,8 @@ notifications: on_failure: always env: + - PG_VERSION=17 + - PG_VERSION=17 LEVEL=hardcore - PG_VERSION=16 - PG_VERSION=16 LEVEL=hardcore - PG_VERSION=15 @@ -35,3 +37,8 @@ env: - PG_VERSION=12 LEVEL=hardcore - PG_VERSION=11 - PG_VERSION=11 LEVEL=hardcore + +matrix: + allow_failures: + - env: PG_VERSION=11 + - env: PG_VERSION=11 LEVEL=hardcore \ No newline at end of file From 331863cd4ab4c9ff0c31f8407d0bf1e0292d90e4 Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Wed, 23 Oct 2024 15:58:29 +0300 Subject: [PATCH 170/182] Update copyrights. --- LICENSE | 2 +- src/disable_core_macro.h | 2 +- src/rum.h | 2 +- src/rum_arr_utils.c | 2 +- src/rum_ts_utils.c | 2 +- src/rumbtree.c | 2 +- src/rumbulk.c | 2 +- src/rumdatapage.c | 2 +- src/rumentrypage.c | 2 +- src/rumget.c | 2 +- src/ruminsert.c | 2 +- src/rumscan.c | 2 +- src/rumsort.c | 2 +- src/rumsort.h | 2 +- src/rumtsquery.c | 2 +- src/rumutil.c | 2 +- src/rumvacuum.c | 2 +- src/rumvalidate.c | 2 +- travis/run_tests.sh | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/LICENSE b/LICENSE index 5b55f87617..a51596793f 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ RUM is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. -Portions Copyright (c) 2015-2019, Postgres Professional +Portions Copyright (c) 2015-2024, Postgres Professional Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group Portions Copyright (c) 1994, The Regents of the University of California diff --git a/src/disable_core_macro.h b/src/disable_core_macro.h index 922ad4cf75..0d6c4a8a3b 100644 --- a/src/disable_core_macro.h +++ b/src/disable_core_macro.h @@ -3,7 +3,7 @@ * disable_core_macro.h * Support including tuplesort.c from postgresql core code. * - * Copyright (c) 2022, Postgres Professional + * Copyright (c) 2022-2024, Postgres Professional * *------------------------------------------------------------------------- */ diff --git a/src/rum.h b/src/rum.h index 7bcfa18221..2139774d08 100644 --- a/src/rum.h +++ b/src/rum.h @@ -3,7 +3,7 @@ * rum.h * Exported definitions for RUM index. * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 2006-2022, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_arr_utils.c b/src/rum_arr_utils.c index 1ee57dbe29..d8dc00699a 100644 --- a/src/rum_arr_utils.c +++ b/src/rum_arr_utils.c @@ -3,7 +3,7 @@ * rum_arr_utils.c * various anyarray-search functions * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rum_ts_utils.c b/src/rum_ts_utils.c index 3133a92c5a..d3b9c5478a 100644 --- a/src/rum_ts_utils.c +++ b/src/rum_ts_utils.c @@ -3,7 +3,7 @@ * rum_ts_utils.c * various text-search functions * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumbtree.c b/src/rumbtree.c index f513848da4..dfe2f10c30 100644 --- a/src/rumbtree.c +++ b/src/rumbtree.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumbulk.c b/src/rumbulk.c index ee93df9fe9..7a03bf64b4 100644 --- a/src/rumbulk.c +++ b/src/rumbulk.c @@ -4,7 +4,7 @@ * routines for fast build of inverted index * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumdatapage.c b/src/rumdatapage.c index 43fc1feb65..922bb7d19a 100644 --- a/src/rumdatapage.c +++ b/src/rumdatapage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumentrypage.c b/src/rumentrypage.c index 5b0139d95e..29e1dd25bb 100644 --- a/src/rumentrypage.c +++ b/src/rumentrypage.c @@ -4,7 +4,7 @@ * page utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumget.c b/src/rumget.c index 70773ed712..ca5d83ee00 100644 --- a/src/rumget.c +++ b/src/rumget.c @@ -4,7 +4,7 @@ * fetch tuples from a RUM scan. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/ruminsert.c b/src/ruminsert.c index e8b209dadf..255e616c99 100644 --- a/src/ruminsert.c +++ b/src/ruminsert.c @@ -4,7 +4,7 @@ * insert routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumscan.c b/src/rumscan.c index 42bca53822..dd77f3de41 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -4,7 +4,7 @@ * routines to manage scans of inverted index relations * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.c b/src/rumsort.c index 96784cd192..0c395f03e7 100644 --- a/src/rumsort.c +++ b/src/rumsort.c @@ -8,7 +8,7 @@ * src/backend/utils/sort/tuplesort.c. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumsort.h b/src/rumsort.h index dfa5117bf1..160aa5c8da 100644 --- a/src/rumsort.h +++ b/src/rumsort.h @@ -7,7 +7,7 @@ * It contains copy of static functions from * src/backend/utils/sort/tuplesort.c. * - * Portions Copyright (c) 2015-2021, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumtsquery.c b/src/rumtsquery.c index 55cd49d19d..6c6b3c86d0 100644 --- a/src/rumtsquery.c +++ b/src/rumtsquery.c @@ -3,7 +3,7 @@ * rumtsquery.c * Inverted fulltext search: indexing tsqueries. * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * *------------------------------------------------------------------------- diff --git a/src/rumutil.c b/src/rumutil.c index 9048918d12..4a239c85c7 100644 --- a/src/rumutil.c +++ b/src/rumutil.c @@ -4,7 +4,7 @@ * utilities routines for the postgres inverted index access method. * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvacuum.c b/src/rumvacuum.c index cce9cb09ee..fd5e4206b4 100644 --- a/src/rumvacuum.c +++ b/src/rumvacuum.c @@ -4,7 +4,7 @@ * delete & vacuum routines for the postgres RUM * * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/src/rumvalidate.c b/src/rumvalidate.c index c4e7dc145f..0adbb10ac7 100644 --- a/src/rumvalidate.c +++ b/src/rumvalidate.c @@ -3,7 +3,7 @@ * rumvalidate.c * Opclass validator for RUM. * - * Portions Copyright (c) 2015-2022, Postgres Professional + * Portions Copyright (c) 2015-2024, Postgres Professional * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * diff --git a/travis/run_tests.sh b/travis/run_tests.sh index a1ea1e3e45..aaebf3f200 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019, Postgres Professional +# Copyright (c) 2019-2024, Postgres Professional # # supported levels: # * standard From 34619f96302f496e10e0cb6c9b4c28a846cf9a42 Mon Sep 17 00:00:00 2001 From: Arseny Kositsin <145198443+arseny114@users.noreply.github.com> Date: Fri, 22 Nov 2024 12:44:12 +0300 Subject: [PATCH 171/182] Fixed the content of the error message (#133) Fixed the content of the error message If the "rum" index is created without the "WITH" operator, two columns must be specified for "ORDER BY" to work. But now error message in this case looks like: "ERROR: cannot order without attribute ... in WHERE clause" Fixed the content of error message: "WHERE" replaced by "ORDER BY". Added a test that checks the case when only one column is specified in the "ORDER BY" and error message for this. Tags: rum. --- expected/orderby.out | 5 +++++ sql/orderby.sql | 7 +++++++ src/rumscan.c | 2 +- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/expected/orderby.out b/expected/orderby.out index 9bc421970c..0f8a5454ef 100644 --- a/expected/orderby.out +++ b/expected/orderby.out @@ -460,6 +460,11 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 458 | Fri May 20 21:21:22.326724 2016 (3 rows) +-- Test "ORDER BY" error message +DROP INDEX tsts_idx; +CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d); +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +ERROR: cannot order without attribute 2 in ORDER BY clause -- Test multicolumn index RESET enable_indexscan; RESET enable_indexonlyscan; diff --git a/sql/orderby.sql b/sql/orderby.sql index 28e5b6038b..4c2689d193 100644 --- a/sql/orderby.sql +++ b/sql/orderby.sql @@ -95,6 +95,13 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d <= '2016-05-16 14:21:25' ORDER SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d ASC LIMIT 3; SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER BY d DESC LIMIT 3; +-- Test "ORDER BY" error message +DROP INDEX tsts_idx; + +CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d); + +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; + -- Test multicolumn index RESET enable_indexscan; diff --git a/src/rumscan.c b/src/rumscan.c index dd77f3de41..089730fac4 100644 --- a/src/rumscan.c +++ b/src/rumscan.c @@ -214,7 +214,7 @@ rumFillScanKey(RumScanOpaque so, OffsetNumber attnum, } if (scanKey == NULL) - elog(ERROR, "cannot order without attribute %d in WHERE clause", + elog(ERROR, "cannot order without attribute %d in ORDER BY clause", key->attnum); else if (scanKey->nentries > 1) elog(ERROR, "scan key should contain only one value"); From f82ab184d53d347d8ef5b044b78ebbab922f3d0f Mon Sep 17 00:00:00 2001 From: Karina <55838532+Green-Chan@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:31:14 +0300 Subject: [PATCH 172/182] Stabilize array test. (#135) Make this test output more independent of PostgreSQL server version and system. Authored-by: Karina Litskevich --- expected/array.out | 74 ++-- expected/array_1.out | 74 ++-- expected/array_2.out | 899 ------------------------------------------- expected/array_3.out | 892 ------------------------------------------ sql/array.sql | 43 ++- 5 files changed, 132 insertions(+), 1850 deletions(-) delete mode 100644 expected/array_2.out delete mode 100644 expected/array_3.out diff --git a/expected/array.out b/expected/array.out index fe64c54f24..1e45885ad1 100644 --- a/expected/array.out +++ b/expected/array.out @@ -7,12 +7,6 @@ * array.sql and array_1.sql * -------------------- * Test output for 64-bit and 32-bit systems respectively. - * - * -------------------- - * array_2.sql and array_3.sql - * -------------------- - * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows - * in the output has been changed. */ set enable_seqscan=off; set enable_sort=off; @@ -859,41 +853,71 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; DROP INDEX idx_array; /* * Check ordering using distance operator + * + * We want to check that index scan provides us correct ordering by distance + * operator. File 'data/rum_array.data' contains two arrays that statisfy + * i @> '{23,20}' and have finite distance i <=> '{51}', and a bunch of arrays + * that statisfy i @> '{23,20}' and have infinite distance i <=> '{51}'. + * + * When ordering by distance the order of this bunch of arrays with infinite + * distance is not determined and may depend of PostgreSQL version and system. + * We don't add another sort expression to ORDER BY because that might cause + * the planner to avoid using the index. Instead, we replace arrays that have + * infinite distance with {-1} to unambiguously determine the test output. + * + * 'Infinity' is printed differently in the output in different PostgreSQL + * versions, so we replace it with -1. */ CREATE TABLE test_array_order ( i int2[] ); \copy test_array_order(i) from 'data/rum_array.data'; CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +/* + * Check that plan of the query uses ordering provided by index scan + */ EXPLAIN (COSTS OFF) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - QUERY PLAN ------------------------------------------------------- - Index Scan using idx_array_order on test_array_order - Index Cond: (i @> '{23,20}'::smallint[]) - Order By: (i <=> '{51}'::smallint[]) -(3 rows) +SELECT + CASE WHEN distance = 'Infinity' THEN '{-1}' + ELSE i + END i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY distance) t; + QUERY PLAN +------------------------------------------------------------ + Subquery Scan on t + -> Index Scan using idx_array_order on test_array_order + Index Cond: (i @> '{23,20}'::smallint[]) + Order By: (i <=> '{51}'::smallint[]) +(4 rows) -SELECT i, +SELECT + CASE WHEN distance = 'Infinity' THEN '{-1}' + ELSE i + END i, CASE WHEN distance = 'Infinity' THEN -1 ELSE distance::numeric(18,14) END distance FROM (SELECT *, (i <=> '{51}') AS distance - FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + FROM test_array_order WHERE i @> '{23,20}' ORDER BY distance) t; i | distance ---------------------+------------------ {20,23,51} | 1.73205080756888 {33,51,20,77,23,65} | 2.44948974278318 - {23,76,34,23,2,20} | -1 - {20,60,45,23,29} | -1 - {23,89,38,20,40,95} | -1 - {23,20,72} | -1 - {73,23,20} | -1 - {6,97,20,89,23} | -1 - {20,98,30,23,1,66} | -1 - {57,23,39,46,50,20} | -1 - {81,20,26,22,23} | -1 - {18,23,10,90,15,20} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 (12 rows) diff --git a/expected/array_1.out b/expected/array_1.out index 3f3bf80bf0..c0876801bc 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -7,12 +7,6 @@ * array.sql and array_1.sql * -------------------- * Test output for 64-bit and 32-bit systems respectively. - * - * -------------------- - * array_2.sql and array_3.sql - * -------------------- - * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows - * in the output has been changed. */ set enable_seqscan=off; set enable_sort=off; @@ -852,41 +846,71 @@ EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; DROP INDEX idx_array; /* * Check ordering using distance operator + * + * We want to check that index scan provides us correct ordering by distance + * operator. File 'data/rum_array.data' contains two arrays that statisfy + * i @> '{23,20}' and have finite distance i <=> '{51}', and a bunch of arrays + * that statisfy i @> '{23,20}' and have infinite distance i <=> '{51}'. + * + * When ordering by distance the order of this bunch of arrays with infinite + * distance is not determined and may depend of PostgreSQL version and system. + * We don't add another sort expression to ORDER BY because that might cause + * the planner to avoid using the index. Instead, we replace arrays that have + * infinite distance with {-1} to unambiguously determine the test output. + * + * 'Infinity' is printed differently in the output in different PostgreSQL + * versions, so we replace it with -1. */ CREATE TABLE test_array_order ( i int2[] ); \copy test_array_order(i) from 'data/rum_array.data'; CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +/* + * Check that plan of the query uses ordering provided by index scan + */ EXPLAIN (COSTS OFF) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - QUERY PLAN ------------------------------------------------------- - Index Scan using idx_array_order on test_array_order - Index Cond: (i @> '{23,20}'::smallint[]) - Order By: (i <=> '{51}'::smallint[]) -(3 rows) +SELECT + CASE WHEN distance = 'Infinity' THEN '{-1}' + ELSE i + END i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY distance) t; + QUERY PLAN +------------------------------------------------------------ + Subquery Scan on t + -> Index Scan using idx_array_order on test_array_order + Index Cond: (i @> '{23,20}'::smallint[]) + Order By: (i <=> '{51}'::smallint[]) +(4 rows) -SELECT i, +SELECT + CASE WHEN distance = 'Infinity' THEN '{-1}' + ELSE i + END i, CASE WHEN distance = 'Infinity' THEN -1 ELSE distance::numeric(18,14) END distance FROM (SELECT *, (i <=> '{51}') AS distance - FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + FROM test_array_order WHERE i @> '{23,20}' ORDER BY distance) t; i | distance ---------------------+------------------ {20,23,51} | 1.73205080756888 {33,51,20,77,23,65} | 2.44948974278318 - {23,76,34,23,2,20} | -1 - {20,60,45,23,29} | -1 - {23,89,38,20,40,95} | -1 - {23,20,72} | -1 - {73,23,20} | -1 - {6,97,20,89,23} | -1 - {20,98,30,23,1,66} | -1 - {57,23,39,46,50,20} | -1 - {81,20,26,22,23} | -1 - {18,23,10,90,15,20} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 + {-1} | -1 (12 rows) diff --git a/expected/array_2.out b/expected/array_2.out deleted file mode 100644 index 0097a5b5f4..0000000000 --- a/expected/array_2.out +++ /dev/null @@ -1,899 +0,0 @@ -/* - * --------------------------------------------- - * NOTE: This test behaves differenly on PgPro - * --------------------------------------------- - * - * -------------------- - * array.sql and array_1.sql - * -------------------- - * Test output for 64-bit and 32-bit systems respectively. - * - * -------------------- - * array_2.sql and array_3.sql - * -------------------- - * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows - * in the output has been changed. - */ -set enable_seqscan=off; -set enable_sort=off; -/* - * Complete checks for int2[]. - */ -CREATE TABLE test_array ( - i int2[] -); -INSERT INTO test_array VALUES ('{}'), ('{0}'), ('{1,2,3,4}'), ('{1,2,3}'), ('{1,2}'), ('{1}'); -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -SELECT NULL::int[] = '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] && '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] @> '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] <@ '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] % '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] <=> '{1}'; - ?column? ----------- - -(1 row) - -INSERT INTO test_array VALUES (NULL); -SELECT * FROM test_array WHERE i = '{1}'; - i ------ - {1} -(1 row) - -DELETE FROM test_array WHERE i IS NULL; -SELECT * FROM test_array WHERE i = '{NULL}'; -ERROR: array must not contain nulls -SELECT * FROM test_array WHERE i = '{1,2,3,NULL}'; -ERROR: array must not contain nulls -SELECT * FROM test_array WHERE i = '{{1,2},{3,4}}'; -ERROR: array must have 1 dimension -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i = '{}'; - i ----- - {} -(1 row) - -SELECT * FROM test_array WHERE i = '{0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i = '{1}'; - i ------ - {1} -(1 row) - -SELECT * FROM test_array WHERE i = '{1,2}'; - i -------- - {1,2} -(1 row) - -SELECT * FROM test_array WHERE i = '{2,1}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i = '{1,2,3,3}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i = '{0,0}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i = '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i && '{}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i && '{1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i && '{3}'; - i ------------ - {1,2,3,4} - {1,2,3} -(2 rows) - -SELECT * FROM test_array WHERE i && '{4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i && '{1,2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{1,2,3}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{1,2,3,4}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{4,3,2,1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{0,0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i && '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i @> '{}'; - i ------------ - {} - {0} - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(6 rows) - -SELECT * FROM test_array WHERE i @> '{1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i @> '{2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i @> '{3}'; - i ------------ - {1,2,3,4} - {1,2,3} -(2 rows) - -SELECT * FROM test_array WHERE i @> '{4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{1,2,4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{1,2,3,4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{4,3,2,1}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{0,0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i @> '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i <@ '{}'; - i ----- - {} -(1 row) - -SELECT * FROM test_array WHERE i <@ '{1}'; - i ------ - {} - {1} -(2 rows) - -SELECT * FROM test_array WHERE i <@ '{2}'; - i ----- - {} -(1 row) - -SELECT * FROM test_array WHERE i <@ '{1,2,4}'; - i -------- - {} - {1,2} - {1} -(3 rows) - -SELECT * FROM test_array WHERE i <@ '{1,2,3,4}'; - i ------------ - {} - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(5 rows) - -SELECT * FROM test_array WHERE i <@ '{4,3,2,1}'; - i ------------ - {} - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(5 rows) - -SELECT * FROM test_array WHERE i <@ '{0,0}'; - i ------ - {} - {0} -(2 rows) - -SELECT * FROM test_array WHERE i <@ '{100}'; - i ----- - {} -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i % '{}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i % '{1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i % '{1,2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{1,2,4}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{1,2,3,4}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{4,3,2,1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{1,2,3,4,5}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i % '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i % '{1,10,20,30,40,50}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i % '{1,10,20,30}'; - i ------ - {1} -(1 row) - -SELECT * FROM test_array WHERE i % '{1,1,1,1,1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{0,0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i % '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{1}'::smallint[]) - Order By: (i <=> '{1}'::smallint[]) -(3 rows) - -SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; - i ------------ - {1} - {1,2} - {1,2,3} - {1,2,3,4} -(4 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ADD COLUMN add_info timestamp; -CREATE INDEX idx_array ON test_array -USING rum (i rum_anyarray_addon_ops, add_info) -WITH (attach = 'add_info', to = 'i'); -WITH q as ( - SELECT row_number() OVER (ORDER BY i) idx, ctid FROM test_array -) -UPDATE test_array SET add_info = '2016-05-16 14:21:25'::timestamp + - format('%s days', q.idx)::interval -FROM q WHERE test_array.ctid = q.ctid; -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ----------------------------------- - Seq Scan on test_array - Filter: (i % '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; - QUERY PLAN ------------------------------------------------------------------------------------------- - Limit - -> Index Scan using idx_array on test_array - Index Cond: (i && '{1}'::smallint[]) - Order By: (add_info <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) - -SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; - i | add_info ------------+-------------------------- - {1} | Thu May 19 14:21:25 2016 - {1,2} | Fri May 20 14:21:25 2016 - {1,2,3} | Sat May 21 14:21:25 2016 - {1,2,3,4} | Sun May 22 14:21:25 2016 -(4 rows) - -DROP INDEX idx_array; -/* - * Sanity checks for popular array types. - */ -ALTER TABLE test_array ALTER COLUMN i TYPE int4[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::integer[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE int8[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::bigint[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE text[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::text[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE varchar[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::character varying[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE char[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::bpchar[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE numeric[] USING i::numeric[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::numeric[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE float4[] USING i::float4[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::real[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE float8[] USING i::float8[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ----------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ----------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::double precision[]) -(2 rows) - -DROP INDEX idx_array; -/* - * Check ordering using distance operator - */ -CREATE TABLE test_array_order ( - i int2[] -); -\copy test_array_order(i) from 'data/rum_array.data'; -CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - QUERY PLAN ------------------------------------------------------- - Index Scan using idx_array_order on test_array_order - Index Cond: (i @> '{23,20}'::smallint[]) - Order By: (i <=> '{51}'::smallint[]) -(3 rows) - -SELECT i, - CASE WHEN distance = 'Infinity' THEN -1 - ELSE distance::numeric(18,14) - END distance - FROM - (SELECT *, (i <=> '{51}') AS distance - FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; - i | distance ----------------------+------------------ - {20,23,51} | 1.73205080756888 - {33,51,20,77,23,65} | 2.44948974278318 - {6,97,20,89,23} | -1 - {20,98,30,23,1,66} | -1 - {57,23,39,46,50,20} | -1 - {81,20,26,22,23} | -1 - {73,23,20} | -1 - {18,23,10,90,15,20} | -1 - {23,76,34,23,2,20} | -1 - {20,60,45,23,29} | -1 - {23,89,38,20,40,95} | -1 - {23,20,72} | -1 -(12 rows) - diff --git a/expected/array_3.out b/expected/array_3.out deleted file mode 100644 index d5012c3a38..0000000000 --- a/expected/array_3.out +++ /dev/null @@ -1,892 +0,0 @@ -/* - * --------------------------------------------- - * NOTE: This test behaves differenly on PgPro - * --------------------------------------------- - * - * -------------------- - * array.sql and array_1.sql - * -------------------- - * Test output for 64-bit and 32-bit systems respectively. - * - * -------------------- - * array_2.sql and array_3.sql - * -------------------- - * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows - * in the output has been changed. - */ -set enable_seqscan=off; -set enable_sort=off; -/* - * Complete checks for int2[]. - */ -CREATE TABLE test_array ( - i int2[] -); -INSERT INTO test_array VALUES ('{}'), ('{0}'), ('{1,2,3,4}'), ('{1,2,3}'), ('{1,2}'), ('{1}'); -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -SELECT NULL::int[] = '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] && '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] @> '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] <@ '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] % '{1}'; - ?column? ----------- - -(1 row) - -SELECT NULL::int[] <=> '{1}'; - ?column? ----------- - -(1 row) - -INSERT INTO test_array VALUES (NULL); -SELECT * FROM test_array WHERE i = '{1}'; - i ------ - {1} -(1 row) - -DELETE FROM test_array WHERE i IS NULL; -SELECT * FROM test_array WHERE i = '{NULL}'; -ERROR: array must not contain nulls -SELECT * FROM test_array WHERE i = '{1,2,3,NULL}'; -ERROR: array must not contain nulls -SELECT * FROM test_array WHERE i = '{{1,2},{3,4}}'; -ERROR: array must have 1 dimension -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i = '{}'; - i ----- - {} -(1 row) - -SELECT * FROM test_array WHERE i = '{0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i = '{1}'; - i ------ - {1} -(1 row) - -SELECT * FROM test_array WHERE i = '{1,2}'; - i -------- - {1,2} -(1 row) - -SELECT * FROM test_array WHERE i = '{2,1}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i = '{1,2,3,3}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i = '{0,0}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i = '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i && '{}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i && '{1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i && '{3}'; - i ------------ - {1,2,3,4} - {1,2,3} -(2 rows) - -SELECT * FROM test_array WHERE i && '{4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i && '{1,2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{1,2,3}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{1,2,3,4}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{4,3,2,1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i && '{0,0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i && '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i @> '{}'; - i ------------ - {} - {0} - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(6 rows) - -SELECT * FROM test_array WHERE i @> '{1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i @> '{2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i @> '{3}'; - i ------------ - {1,2,3,4} - {1,2,3} -(2 rows) - -SELECT * FROM test_array WHERE i @> '{4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{1,2,4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{1,2,3,4}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{4,3,2,1}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i @> '{0,0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i @> '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i <@ '{}'; - i ----- - {} -(1 row) - -SELECT * FROM test_array WHERE i <@ '{1}'; - i ------ - {} - {1} -(2 rows) - -SELECT * FROM test_array WHERE i <@ '{2}'; - i ----- - {} -(1 row) - -SELECT * FROM test_array WHERE i <@ '{1,2,4}'; - i -------- - {} - {1,2} - {1} -(3 rows) - -SELECT * FROM test_array WHERE i <@ '{1,2,3,4}'; - i ------------ - {} - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(5 rows) - -SELECT * FROM test_array WHERE i <@ '{4,3,2,1}'; - i ------------ - {} - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(5 rows) - -SELECT * FROM test_array WHERE i <@ '{0,0}'; - i ------ - {} - {0} -(2 rows) - -SELECT * FROM test_array WHERE i <@ '{100}'; - i ----- - {} -(1 row) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::smallint[]) -(2 rows) - -SELECT * FROM test_array WHERE i % '{}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i % '{1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i % '{1,2}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{1,2,4}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{1,2,3,4}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{4,3,2,1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{1,2,3,4,5}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} -(3 rows) - -SELECT * FROM test_array WHERE i % '{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}'; - i ------------ - {1,2,3,4} -(1 row) - -SELECT * FROM test_array WHERE i % '{1,10,20,30,40,50}'; - i ---- -(0 rows) - -SELECT * FROM test_array WHERE i % '{1,10,20,30}'; - i ------ - {1} -(1 row) - -SELECT * FROM test_array WHERE i % '{1,1,1,1,1}'; - i ------------ - {1,2,3,4} - {1,2,3} - {1,2} - {1} -(4 rows) - -SELECT * FROM test_array WHERE i % '{0,0}'; - i ------ - {0} -(1 row) - -SELECT * FROM test_array WHERE i % '{100}'; - i ---- -(0 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{1}'::smallint[]) - Order By: (i <=> '{1}'::smallint[]) -(3 rows) - -SELECT * FROM test_array WHERE i && '{1}' ORDER BY i <=> '{1}' ASC; - i ------------ - {1} - {1,2} - {1,2,3} - {1,2,3,4} -(4 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ADD COLUMN add_info timestamp; -CREATE INDEX idx_array ON test_array -USING rum (i rum_anyarray_addon_ops, add_info) -WITH (attach = 'add_info', to = 'i'); -WITH q as ( - SELECT row_number() OVER (ORDER BY i) idx, ctid FROM test_array -) -UPDATE test_array SET add_info = '2016-05-16 14:21:25'::timestamp + - format('%s days', q.idx)::interval -FROM q WHERE test_array.ctid = q.ctid; -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ----------------------------------- - Seq Scan on test_array - Filter: (i % '{}'::smallint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; - QUERY PLAN ------------------------------------------------------------------------------------------- - Limit - -> Index Scan using idx_array on test_array - Index Cond: (i && '{1}'::smallint[]) - Order By: (add_info <=> 'Mon May 16 14:21:25 2016'::timestamp without time zone) -(4 rows) - -SELECT * FROM test_array WHERE i && '{1}' ORDER BY add_info <=> '2016-05-16 14:21:25' LIMIT 10; -ERROR: doesn't support order by over pass-by-reference column -DROP INDEX idx_array; -/* - * Sanity checks for popular array types. - */ -ALTER TABLE test_array ALTER COLUMN i TYPE int4[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::integer[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::integer[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE int8[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::bigint[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::bigint[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE text[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::text[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::text[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE varchar[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::character varying[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::character varying[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE char[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::bpchar[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::bpchar[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE numeric[] USING i::numeric[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::numeric[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::numeric[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE float4[] USING i::float4[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::real[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::real[]) -(2 rows) - -DROP INDEX idx_array; -ALTER TABLE test_array ALTER COLUMN i TYPE float8[] USING i::float8[]; -CREATE INDEX idx_array ON test_array USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i = '{}'; - QUERY PLAN ----------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i = '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i && '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i && '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i @> '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i @> '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i <@ '{}'; - QUERY PLAN ------------------------------------------------ - Index Scan using idx_array on test_array - Index Cond: (i <@ '{}'::double precision[]) -(2 rows) - -EXPLAIN (COSTS OFF) SELECT * FROM test_array WHERE i % '{}'; - QUERY PLAN ----------------------------------------------- - Index Scan using idx_array on test_array - Index Cond: (i % '{}'::double precision[]) -(2 rows) - -DROP INDEX idx_array; -/* - * Check ordering using distance operator - */ -CREATE TABLE test_array_order ( - i int2[] -); -\copy test_array_order(i) from 'data/rum_array.data'; -CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); -EXPLAIN (COSTS OFF) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; - QUERY PLAN ------------------------------------------------------- - Index Scan using idx_array_order on test_array_order - Index Cond: (i @> '{23,20}'::smallint[]) - Order By: (i <=> '{51}'::smallint[]) -(3 rows) - -SELECT i, - CASE WHEN distance = 'Infinity' THEN -1 - ELSE distance::numeric(18,14) - END distance - FROM - (SELECT *, (i <=> '{51}') AS distance - FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; - i | distance ----------------------+------------------ - {20,23,51} | 1.73205080756888 - {33,51,20,77,23,65} | 2.44948974278318 - {18,23,10,90,15,20} | -1 - {23,76,34,23,2,20} | -1 - {73,23,20} | -1 - {23,89,38,20,40,95} | -1 - {23,20,72} | -1 - {20,60,45,23,29} | -1 - {81,20,26,22,23} | -1 - {6,97,20,89,23} | -1 - {20,98,30,23,1,66} | -1 - {57,23,39,46,50,20} | -1 -(12 rows) - diff --git a/sql/array.sql b/sql/array.sql index c17c1252c8..ba6886780d 100644 --- a/sql/array.sql +++ b/sql/array.sql @@ -7,12 +7,6 @@ * array.sql and array_1.sql * -------------------- * Test output for 64-bit and 32-bit systems respectively. - * - * -------------------- - * array_2.sql and array_3.sql - * -------------------- - * Since 6ed83d5fa55c in PostgreSQL 17, the order of rows - * in the output has been changed. */ @@ -216,6 +210,20 @@ DROP INDEX idx_array; /* * Check ordering using distance operator + * + * We want to check that index scan provides us correct ordering by distance + * operator. File 'data/rum_array.data' contains two arrays that statisfy + * i @> '{23,20}' and have finite distance i <=> '{51}', and a bunch of arrays + * that statisfy i @> '{23,20}' and have infinite distance i <=> '{51}'. + * + * When ordering by distance the order of this bunch of arrays with infinite + * distance is not determined and may depend of PostgreSQL version and system. + * We don't add another sort expression to ORDER BY because that might cause + * the planner to avoid using the index. Instead, we replace arrays that have + * infinite distance with {-1} to unambiguously determine the test output. + * + * 'Infinity' is printed differently in the output in different PostgreSQL + * versions, so we replace it with -1. */ CREATE TABLE test_array_order ( @@ -225,12 +233,29 @@ CREATE TABLE test_array_order ( CREATE INDEX idx_array_order ON test_array_order USING rum (i rum_anyarray_ops); +/* + * Check that plan of the query uses ordering provided by index scan + */ + EXPLAIN (COSTS OFF) -SELECT *, i <=> '{51}' from test_array_order WHERE i @> '{23,20}' order by i <=> '{51}'; -SELECT i, +SELECT + CASE WHEN distance = 'Infinity' THEN '{-1}' + ELSE i + END i, + CASE WHEN distance = 'Infinity' THEN -1 + ELSE distance::numeric(18,14) + END distance + FROM + (SELECT *, (i <=> '{51}') AS distance + FROM test_array_order WHERE i @> '{23,20}' ORDER BY distance) t; + +SELECT + CASE WHEN distance = 'Infinity' THEN '{-1}' + ELSE i + END i, CASE WHEN distance = 'Infinity' THEN -1 ELSE distance::numeric(18,14) END distance FROM (SELECT *, (i <=> '{51}') AS distance - FROM test_array_order WHERE i @> '{23,20}' ORDER BY i <=> '{51}') t; + FROM test_array_order WHERE i @> '{23,20}' ORDER BY distance) t; From 2f03e1b21c741ab80bba90e16ca1033c40df3e91 Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Wed, 6 Nov 2024 14:28:24 +0300 Subject: [PATCH 173/182] Remake of pglist_test.py but on the perl. This test download dump of big test base pglist to /pglist_tmp directory. Delete folder pglist_tmp during 'make clean'. Run new test only in case PG_TEST_EXTRA contain 'big_values'. --- Makefile | 2 + README.md | 20 ++++ t/002_pglist.pl | 207 ++++++++++++++++++++++++++++++++++++++++++ tests/README.md | 14 --- tests/__init__.py | 0 tests/pglist_tests.py | 157 -------------------------------- travis/run_tests.sh | 1 + 7 files changed, 230 insertions(+), 171 deletions(-) create mode 100644 t/002_pglist.pl delete mode 100644 tests/README.md delete mode 100644 tests/__init__.py delete mode 100644 tests/pglist_tests.py diff --git a/Makefile b/Makefile index f24246528f..ab4ee1df38 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,8 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp orderby orderby_ha TAP_TESTS = 1 +EXTRA_CLEAN = pglist_tmp + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/README.md b/README.md index 5f3e6887cf..b6fb08420c 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,26 @@ Typical installation procedure may look like this: > **Important:** Don't forget to set the `PG_CONFIG` variable in case you want to test `RUM` on a custom build of PostgreSQL. Read more [here](https://wiki.postgresql.org/wiki/Building_and_Installing_PostgreSQL_Extension_Modules). +## Tests + +$ make check + +This command runs: +- regression tests; +- isolation tests; +- tap tests. + + One of the tap tests downloads a 1GB archive and then unpacks it + into a file weighing almost 3GB. It is disabled by default. + + To run this test, you need to set an environment variable: + + $ export PG_TEST_EXTRA=big_values + + The way to turn it off again: + + $ export -n PG_TEST_EXTRA + ## Common operators and functions The **rum** module provides next operators. diff --git a/t/002_pglist.pl b/t/002_pglist.pl new file mode 100644 index 0000000000..7b2d76c058 --- /dev/null +++ b/t/002_pglist.pl @@ -0,0 +1,207 @@ +# Test RUM index with big base 'pglist'. +use strict; +use warnings; +use Config; +use Test::More; + +plan skip_all => 'This test requires downloading a 1GB archive. ' . + 'The unpacked file weighs almost 3GB. ' . + 'Perform only if the big_values is enabled in PG_TEST_EXTRA' + unless $ENV{PG_TEST_EXTRA} && $ENV{PG_TEST_EXTRA} =~ /\bbig_values\b/; + +plan tests => 4; + +my $node; + +# Utility function + +sub file_exists +{ + my ($file) = @_; + return -e $file; +} + +# Check the existence of the test base, install if necessary + +sub install_pglist +{ + my $dir = Cwd->getcwd; #current directory + + my %config = ( + #directory with pglist dump must be inside the current directory + pglist_tmp_dir => $dir . '/pglist_tmp/', + dump_name => 'pglist-28-04-16.dump', + dump_url => 'http://www.sai.msu.su/~megera/postgres/files/pglist-28-04-16.dump.gz', + pglist_archive => $dir . '/pglist_tmp/' . 'pglist-28-04-16.dump.gz', + ); + + my $path_to_dump = $config{pglist_tmp_dir} . $config{dump_name}; + + if (file_exists($path_to_dump)) + { + note($config{dump_name} . ' already installed'); + } + else + { + # Create folder /contrib/rum/pglist_tmp if not already exists + mkdir($config{pglist_tmp_dir}, 0700) + unless file_exists($config{pglist_tmp_dir}); + + # Download archive pglist-28-04-16.dump.gz if not already exists + unless (file_exists($config{pglist_archive})) + { + note('Downloading pglist dump in ' . $config{pglist_archive}); + + # Flag "-nv" allows us to avoid frequent messages + # about the download status in the log. + # But it can be enabled for debugging purposes. + system("wget -P $config{pglist_tmp_dir} -nv $config{dump_url}") == 0 + or die "Couldn't get archive by link: $?"; + } + + # Unzip the dump. Delete archive to save memory + system("gzip -d $config{pglist_archive}") == 0 + or die "Couldn't extract archive: $?"; + + file_exists($path_to_dump) + or die "Failed to get " . $config{dump_name}; + + note($config{dump_name} . ' is ready to use'); + } + + $node->psql("postgres", "CREATE DATABASE pglist"); + $node->psql("postgres", "CREATE ROLE oleg"); + my $command = "'" . $path_to_dump . "'"; + my $result = $node->psql("pglist", '\i ' . $command); +} + +# Tests SELECT constructions to 'pglist' base + +sub test_select +{ + note("Creating index 'rumidx_orderby_sent'"); + + $node->safe_psql("pglist", "CREATE INDEX rumidx_orderby_sent ON pglist " . + "USING rum (fts rum_tsvector_timestamp_ops, sent) " . + "WITH (attach=sent, to=fts, order_by_attach=t)"); + + note("Test ORDER BY timestamp"); + + my $result1 = $node->safe_psql("pglist", + "SELECT sent, subject FROM pglist WHERE fts @@ " . + "to_tsquery('english', 'backend <-> crushed') " . + "ORDER BY sent <=| '2016-01-01 00:01' LIMIT 5"); + + is($result1, '1999-06-02 11:52:46|Re: [HACKERS] PID of backend'); + + note("Test tsvector filter"); + + my $result2 = $node->safe_psql("pglist", + "SELECT count(*) FROM pglist " . + "WHERE fts @@ to_tsquery('english', 'tom & lane')"); + + is($result2, '222813'); + + $node->safe_psql("pglist", "DROP INDEX rumidx_orderby_sent"); +} + +sub test_order_by +{ + note("Creating index 'pglist_rum_idx'"); + + $node->safe_psql("pglist", + "CREATE INDEX pglist_rum_idx ON pglist " . + "USING rum (fts rum_tsvector_ops)"); + + note("Test ORDER BY tsvector"); + + my $result3 = $node->safe_psql("pglist", + "SELECT id FROM pglist " . + "WHERE fts @@ to_tsquery('english', 'postgres:*') " . + "ORDER BY fts <=> " . + "to_tsquery('english', 'postgres:*') LIMIT 9"); + + is((split(" ", $result3))[0], '816114'); + + # Autovacuum after large update, with active RUM index crashes postgres + note("Test Issue #19"); + + my $stderr; + $node->safe_psql("pglist", "DELETE FROM pglist WHERE id < 100000"); + $node->safe_psql("pglist", "vacuum", stderr => \$stderr); + + is($stderr, undef); + + $node->safe_psql("pglist", "DROP INDEX pglist_rum_idx"); +} + +# Start backend + +my $pg_15_modules; + +BEGIN +{ + $pg_15_modules = eval + { + require PostgreSQL::Test::Cluster; + require PostgreSQL::Test::Utils; + return 1; + }; + + unless (defined $pg_15_modules) + { + $pg_15_modules = 0; + + require PostgresNode; + require TestLib; + } +} + +note('PostgreSQL 15 modules are used: ' . ($pg_15_modules ? 'yes' : 'no')); + +if ($pg_15_modules) +{ + $node = PostgreSQL::Test::Cluster->new("master"); +} +else +{ + $node = PostgresNode::get_new_node("master"); +} + +$node->init(allows_streaming => 1); +$node->append_conf("postgresql.conf", "shared_buffers='4GB'\n" . + "maintenance_work_mem='2GB'\n" . + "max_wal_size='2GB'\n" . + "work_mem='50MB'"); +$node->start; + +# Check the existence of the pglist base + +note('Check the existence of the pglist base...'); +my $check_pglist = $node->psql('postgres', "SELECT count(*) FROM pg_database " . + "WHERE datistemplate = false AND " . + "datname = 'pglist'"); +if ($check_pglist == 1) +{ + note("pglist already exists"); +} +else +{ + note("Create pglist database"); + install_pglist(); +} + +$node->psql("pglist", "CREATE EXTENSION rum"); +note('Setup is completed successfully'); + +eval +{ + test_select(); + test_order_by(); + $node->stop(); + done_testing(); + 1; +} or do { + note('Something went wrong: $@\n'); +}; + diff --git a/tests/README.md b/tests/README.md deleted file mode 100644 index 77b7d78e88..0000000000 --- a/tests/README.md +++ /dev/null @@ -1,14 +0,0 @@ -## Running tests - -Install testgres: - -``` -pip install testgres==0.4.0 -``` - -Run command: - -``` -python -m unittest pglist_tests -``` - diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/pglist_tests.py b/tests/pglist_tests.py deleted file mode 100644 index 7b8e4558e0..0000000000 --- a/tests/pglist_tests.py +++ /dev/null @@ -1,157 +0,0 @@ -# coding: utf-8 -""" - Test RUM index with big base 'pglist' - - Copyright (c) 2015-2021, Postgres Professional -""" -import unittest -import os -import sys -import gzip -import testgres as tg - -if sys.version_info[0] < 3: - import urllib as request -else: - import urllib.request as request - -from os.path import expanduser - - -class PglistTests(unittest.TestCase): - - def setUp(self): - current_dir = os.path.dirname(os.path.abspath(__file__)) - - self.node = tg.get_new_node("pglist", - os.path.join(current_dir, "tmp_install")) - try: - self.node.init() - self.node.append_conf("postgresql.conf", - "shared_buffers='4GB'\n" - "maintenance_work_mem='2GB'\n" - "max_wal_size='2GB'\n" - "work_mem='50MB'") - self.node.start() - except Exception as e: - self.printlog(os.path.join(self.node.logs_dir, "postgresql.log")) - raise e - - def tearDown(self): - tg.stop_all() - - def init_pglist_data(self, node): - # Check if 'pglist' base exists - bases = node.execute("postgres", - "SELECT count(*) FROM pg_database " - "WHERE datistemplate = false AND " - " datname = 'pglist'") - if bases[0][0] != 0: - return - - # Check if 'pglist' dump exists - home = expanduser("~") - pglist_dump = os.path.join(home, "pglist-28-04-16.dump") - if not os.path.isfile(pglist_dump): - pglist_dumpgz = pglist_dump + ".gz" - if not os.path.isfile(pglist_dumpgz): - print("Downloading: {0}".format(pglist_dumpgz)) - request.urlretrieve( - "http://www.sai.msu.su/~megera/postgres/files/pglist-28-04-16.dump.gz", - pglist_dumpgz) - - print("Decompressing: {0}".format(pglist_dumpgz)) - gz = gzip.open(pglist_dumpgz, 'rb') - with open(pglist_dump, 'wb') as f: - f.write(gz.read()) - - os.remove(pglist_dumpgz) - - # Restore dump file - print("Restoring 'pglist'") - node.safe_psql("postgres", "CREATE DATABASE pglist") - node.psql("pglist", filename=pglist_dump) - - node.safe_psql("pglist", "CREATE EXTENSION rum") - - def printlog(self, logfile): - with open(logfile, 'r') as log: - for line in log.readlines(): - print(line) - - def test_order_by(self): - """Tests SELECT constructions to 'pglist' base""" - try: - self.init_pglist_data(self.node) - - print("Creating index 'rumidx_orderby_sent'") - - self.node.safe_psql( - "pglist", - "CREATE INDEX rumidx_orderby_sent ON pglist USING rum (" - " fts rum_tsvector_timestamp_ops, sent) " - " WITH (attach=sent, to=fts, order_by_attach=t)") - - print("Running tests") - - self.assertEqual( - self.node.safe_psql( - "pglist", - "SELECT sent, subject " - " FROM pglist " - " WHERE fts @@ " - " to_tsquery('english', 'backend <-> crushed') " - " ORDER BY sent <=| '2016-01-01 00:01' LIMIT 5" - ), - b'1999-06-02 11:52:46|Re: [HACKERS] PID of backend\n' - ) - - self.assertEqual( - self.node.safe_psql( - "pglist", - "SELECT count(*) FROM pglist " - "WHERE fts @@ to_tsquery('english', 'tom & lane')" - ), - b'222813\n' - ) - - self.node.safe_psql("pglist", "DROP INDEX rumidx_orderby_sent"); - - print("Creating index 'pglist_rum_idx'") - - self.node.safe_psql( - "pglist", - "CREATE INDEX pglist_rum_idx ON pglist USING rum (" - " fts rum_tsvector_ops)") - - print("Running tests") - - self.assertEqual( - self.node.execute( - "pglist", - "SELECT id FROM pglist " - "WHERE fts @@ to_tsquery('english', 'postgres:*') " - "ORDER BY fts <=> to_tsquery('english', 'postgres:*') " - "LIMIT 9" - )[0][0], - 816114 - ) - - # Autovacuum after large update, with active RUM index crashes postgres - print("Test Issue #19") - - self.node.safe_psql( - "pglist", - "DELETE FROM pglist WHERE id < 100000") - self.node.safe_psql( - "pglist", - "vacuum") - - self.node.safe_psql("pglist", "DROP INDEX pglist_rum_idx"); - - except Exception as e: - self.printlog(os.path.join(self.node.logs_dir, "postgresql.log")) - raise e - -if __name__ == "__main__": - unittest.main() diff --git a/travis/run_tests.sh b/travis/run_tests.sh index aaebf3f200..37bba84d64 100644 --- a/travis/run_tests.sh +++ b/travis/run_tests.sh @@ -38,6 +38,7 @@ if [ "$LEVEL" = "hardcore" ]; then ./configure \ CFLAGS='-fno-omit-frame-pointer' \ --enable-cassert \ + --enable-tap-tests \ --prefix=$CUSTOM_PG_BIN \ --quiet From 36379d3b54ca1870e0a7f93758e59f23dbf23349 Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Wed, 11 Dec 2024 19:12:03 +0300 Subject: [PATCH 174/182] Update alternative output for 32-bit systems. Commit 34619f96302f changed orderby.sql and orderby.out. But this test has alternative output orderby_1.sql for 32-bit systems. Fix comments for files with alternatives. --- expected/array.out | 13 ++++++------- expected/array_1.out | 13 ++++++------- expected/orderby.out | 9 +++++++++ expected/orderby_1.out | 14 ++++++++++++++ sql/array.sql | 13 ++++++------- sql/orderby.sql | 11 +++++++++++ 6 files changed, 52 insertions(+), 21 deletions(-) diff --git a/expected/array.out b/expected/array.out index 1e45885ad1..a2fb3bb8df 100644 --- a/expected/array.out +++ b/expected/array.out @@ -1,12 +1,11 @@ /* - * --------------------------------------------- - * NOTE: This test behaves differenly on PgPro - * --------------------------------------------- + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * array.out - test output for 64-bit systems and + * array_1.out - test output for 32-bit systems. * - * -------------------- - * array.sql and array_1.sql - * -------------------- - * Test output for 64-bit and 32-bit systems respectively. */ set enable_seqscan=off; set enable_sort=off; diff --git a/expected/array_1.out b/expected/array_1.out index c0876801bc..cc5f93307c 100644 --- a/expected/array_1.out +++ b/expected/array_1.out @@ -1,12 +1,11 @@ /* - * --------------------------------------------- - * NOTE: This test behaves differenly on PgPro - * --------------------------------------------- + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * array.out - test output for 64-bit systems and + * array_1.out - test output for 32-bit systems. * - * -------------------- - * array.sql and array_1.sql - * -------------------- - * Test output for 64-bit and 32-bit systems respectively. */ set enable_seqscan=off; set enable_sort=off; diff --git a/expected/orderby.out b/expected/orderby.out index 0f8a5454ef..07ae7322ed 100644 --- a/expected/orderby.out +++ b/expected/orderby.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * orderby.out - test output for 64-bit systems and + * orderby_1.out - test output for 32-bit systems. + * + */ CREATE TABLE tsts (id int, t tsvector, d timestamp); \copy tsts from 'data/tsts.data' CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d) diff --git a/expected/orderby_1.out b/expected/orderby_1.out index a6fb68c1e3..cdd536ac9d 100644 --- a/expected/orderby_1.out +++ b/expected/orderby_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * orderby.out - test output for 64-bit systems and + * orderby_1.out - test output for 32-bit systems. + * + */ CREATE TABLE tsts (id int, t tsvector, d timestamp); \copy tsts from 'data/tsts.data' CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d) @@ -420,6 +429,11 @@ SELECT id, d FROM tsts WHERE t @@ 'wr&qh' AND d >= '2016-05-16 14:21:25' ORDER 458 | Fri May 20 21:21:22.326724 2016 (3 rows) +-- Test "ORDER BY" error message +DROP INDEX tsts_idx; +CREATE INDEX tsts_idx ON tsts USING rum (t rum_tsvector_addon_ops, d); +SELECT id, d, d <=> '2016-05-16 14:21:25' FROM tsts WHERE t @@ 'wr&qh' ORDER BY d <=> '2016-05-16 14:21:25' LIMIT 5; +ERROR: doesn't support order by over pass-by-reference column -- Test multicolumn index RESET enable_indexscan; RESET enable_indexonlyscan; diff --git a/sql/array.sql b/sql/array.sql index ba6886780d..9eba800bcf 100644 --- a/sql/array.sql +++ b/sql/array.sql @@ -1,12 +1,11 @@ /* - * --------------------------------------------- - * NOTE: This test behaves differenly on PgPro - * --------------------------------------------- + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * array.out - test output for 64-bit systems and + * array_1.out - test output for 32-bit systems. * - * -------------------- - * array.sql and array_1.sql - * -------------------- - * Test output for 64-bit and 32-bit systems respectively. */ diff --git a/sql/orderby.sql b/sql/orderby.sql index 4c2689d193..a2bd227873 100644 --- a/sql/orderby.sql +++ b/sql/orderby.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * orderby.out - test output for 64-bit systems and + * orderby_1.out - test output for 32-bit systems. + * + */ + + CREATE TABLE tsts (id int, t tsvector, d timestamp); \copy tsts from 'data/tsts.data' From 02a8d77cea827142a2c42c2679ab2ddbf53170b5 Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Wed, 11 Dec 2024 19:36:13 +0300 Subject: [PATCH 175/182] Add comments about alternative outputs. This comments should remind us to change the output for 32-bit systems at the same time as the tests and 64-bit outputs. --- expected/altorder.out | 9 +++++++++ expected/altorder_1.out | 9 +++++++++ expected/altorder_hash.out | 9 +++++++++ expected/altorder_hash_1.out | 9 +++++++++ expected/float8.out | 9 +++++++++ expected/float8_1.out | 9 +++++++++ expected/int8.out | 9 +++++++++ expected/int8_1.out | 9 +++++++++ expected/money.out | 9 +++++++++ expected/money_1.out | 9 +++++++++ expected/orderby_hash.out | 9 +++++++++ expected/orderby_hash_1.out | 9 +++++++++ expected/timestamp.out | 9 +++++++++ expected/timestamp_1.out | 9 +++++++++ sql/altorder.sql | 11 +++++++++++ sql/altorder_hash.sql | 11 +++++++++++ sql/float8.sql | 11 +++++++++++ sql/int8.sql | 11 +++++++++++ sql/money.sql | 11 +++++++++++ sql/orderby_hash.sql | 11 +++++++++++ sql/timestamp.sql | 10 ++++++++++ 21 files changed, 202 insertions(+) diff --git a/expected/altorder.out b/expected/altorder.out index dcf5a51954..6c0bcae2ad 100644 --- a/expected/altorder.out +++ b/expected/altorder.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * altorder.out - test output for 64-bit systems and + * altorder_1.out - test output for 32-bit systems. + * + */ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' -- PGPRO-2537: We need more data to test rumsort.c with logtape.c diff --git a/expected/altorder_1.out b/expected/altorder_1.out index 2fb7f52326..980515f58e 100644 --- a/expected/altorder_1.out +++ b/expected/altorder_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * altorder.out - test output for 64-bit systems and + * altorder_1.out - test output for 32-bit systems. + * + */ CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' -- PGPRO-2537: We need more data to test rumsort.c with logtape.c diff --git a/expected/altorder_hash.out b/expected/altorder_hash.out index 125c195be9..1011b90d0c 100644 --- a/expected/altorder_hash.out +++ b/expected/altorder_hash.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * altorder_hash.out - test output for 64-bit systems and + * altorder_hash_1.out - test output for 32-bit systems. + * + */ CREATE TABLE atstsh (id int, t tsvector, d timestamp); \copy atstsh from 'data/tsts.data' CREATE INDEX atstsh_idx ON atstsh USING rum (t rum_tsvector_hash_addon_ops, d) diff --git a/expected/altorder_hash_1.out b/expected/altorder_hash_1.out index 2d93f3f5eb..e310fbdb89 100644 --- a/expected/altorder_hash_1.out +++ b/expected/altorder_hash_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * altorder_hash.out - test output for 64-bit systems and + * altorder_hash_1.out - test output for 32-bit systems. + * + */ CREATE TABLE atstsh (id int, t tsvector, d timestamp); \copy atstsh from 'data/tsts.data' CREATE INDEX atstsh_idx ON atstsh USING rum (t rum_tsvector_hash_addon_ops, d) diff --git a/expected/float8.out b/expected/float8.out index e96cb0ea54..fdca51343a 100644 --- a/expected/float8.out +++ b/expected/float8.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * float8.out - test output for 64-bit systems and + * float8_1.out - test output for 32-bit systems. + * + */ set enable_seqscan=off; CREATE TABLE test_float8 ( i float8 diff --git a/expected/float8_1.out b/expected/float8_1.out index dabdd51964..b421dcf311 100644 --- a/expected/float8_1.out +++ b/expected/float8_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * float8.out - test output for 64-bit systems and + * float8_1.out - test output for 32-bit systems. + * + */ set enable_seqscan=off; CREATE TABLE test_float8 ( i float8 diff --git a/expected/int8.out b/expected/int8.out index 62e4f80a37..663162a18e 100644 --- a/expected/int8.out +++ b/expected/int8.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * int8.out - test output for 64-bit systems and + * int8_1.out - test output for 32-bit systems. + * + */ set enable_seqscan=off; CREATE TABLE test_int8 ( i int8 diff --git a/expected/int8_1.out b/expected/int8_1.out index cbf68dff13..ffced0aaf8 100644 --- a/expected/int8_1.out +++ b/expected/int8_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * int8.out - test output for 64-bit systems and + * int8_1.out - test output for 32-bit systems. + * + */ set enable_seqscan=off; CREATE TABLE test_int8 ( i int8 diff --git a/expected/money.out b/expected/money.out index 7b9b20580e..b2e9bac41d 100644 --- a/expected/money.out +++ b/expected/money.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * money.out - test output for 64-bit systems and + * money_1.out - test output for 32-bit systems. + * + */ set enable_seqscan=off; CREATE TABLE test_money ( i money diff --git a/expected/money_1.out b/expected/money_1.out index b8ec0ec5c7..6a3fa8c211 100644 --- a/expected/money_1.out +++ b/expected/money_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * money.out - test output for 64-bit systems and + * money_1.out - test output for 32-bit systems. + * + */ set enable_seqscan=off; CREATE TABLE test_money ( i money diff --git a/expected/orderby_hash.out b/expected/orderby_hash.out index 7ff1794c5f..782ad5700e 100644 --- a/expected/orderby_hash.out +++ b/expected/orderby_hash.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * orderby_hash.out - test output for 64-bit systems and + * orderby_hash_1.out - test output for 32-bit systems. + * + */ CREATE TABLE tstsh (id int, t tsvector, d timestamp); \copy tstsh from 'data/tsts.data' CREATE INDEX tstsh_idx ON tstsh USING rum (t rum_tsvector_hash_addon_ops, d) diff --git a/expected/orderby_hash_1.out b/expected/orderby_hash_1.out index f32267631c..f19e4507c7 100644 --- a/expected/orderby_hash_1.out +++ b/expected/orderby_hash_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * orderby_hash.out - test output for 64-bit systems and + * orderby_hash_1.out - test output for 32-bit systems. + * + */ CREATE TABLE tstsh (id int, t tsvector, d timestamp); \copy tstsh from 'data/tsts.data' CREATE INDEX tstsh_idx ON tstsh USING rum (t rum_tsvector_hash_addon_ops, d) diff --git a/expected/timestamp.out b/expected/timestamp.out index 37f26f073f..00969a7534 100644 --- a/expected/timestamp.out +++ b/expected/timestamp.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * timestamp.out - test output for 64-bit systems and + * timestamp_1.out - test output for 32-bit systems. + * + */ CREATE TABLE test_timestamp ( i timestamp ); diff --git a/expected/timestamp_1.out b/expected/timestamp_1.out index e15bcc7584..a8641a3232 100644 --- a/expected/timestamp_1.out +++ b/expected/timestamp_1.out @@ -1,3 +1,12 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * timestamp.out - test output for 64-bit systems and + * timestamp_1.out - test output for 32-bit systems. + * + */ CREATE TABLE test_timestamp ( i timestamp ); diff --git a/sql/altorder.sql b/sql/altorder.sql index bc89f8fc06..850e252325 100644 --- a/sql/altorder.sql +++ b/sql/altorder.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * altorder.out - test output for 64-bit systems and + * altorder_1.out - test output for 32-bit systems. + * + */ + + CREATE TABLE atsts (id int, t tsvector, d timestamp); \copy atsts from 'data/tsts.data' diff --git a/sql/altorder_hash.sql b/sql/altorder_hash.sql index 3b723876f9..148407c661 100644 --- a/sql/altorder_hash.sql +++ b/sql/altorder_hash.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * altorder_hash.out - test output for 64-bit systems and + * altorder_hash_1.out - test output for 32-bit systems. + * + */ + + CREATE TABLE atstsh (id int, t tsvector, d timestamp); \copy atstsh from 'data/tsts.data' diff --git a/sql/float8.sql b/sql/float8.sql index 2de5b9ea19..b61cbfb0da 100644 --- a/sql/float8.sql +++ b/sql/float8.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * float8.out - test output for 64-bit systems and + * float8_1.out - test output for 32-bit systems. + * + */ + + set enable_seqscan=off; CREATE TABLE test_float8 ( diff --git a/sql/int8.sql b/sql/int8.sql index 4ec9bf0abf..c51705e62b 100644 --- a/sql/int8.sql +++ b/sql/int8.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * int8.out - test output for 64-bit systems and + * int8_1.out - test output for 32-bit systems. + * + */ + + set enable_seqscan=off; CREATE TABLE test_int8 ( diff --git a/sql/money.sql b/sql/money.sql index 952d2bc8fe..13df5ed260 100644 --- a/sql/money.sql +++ b/sql/money.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * money.out - test output for 64-bit systems and + * money_1.out - test output for 32-bit systems. + * + */ + + set enable_seqscan=off; CREATE TABLE test_money ( diff --git a/sql/orderby_hash.sql b/sql/orderby_hash.sql index f7e9808538..dba8f17ca1 100644 --- a/sql/orderby_hash.sql +++ b/sql/orderby_hash.sql @@ -1,3 +1,14 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * orderby_hash.out - test output for 64-bit systems and + * orderby_hash_1.out - test output for 32-bit systems. + * + */ + + CREATE TABLE tstsh (id int, t tsvector, d timestamp); \copy tstsh from 'data/tsts.data' diff --git a/sql/timestamp.sql b/sql/timestamp.sql index 8025774b82..3386229ddc 100644 --- a/sql/timestamp.sql +++ b/sql/timestamp.sql @@ -1,3 +1,13 @@ +/* + * ------------------------------------ + * NOTE: This test behaves differenly + * ------------------------------------ + * + * timestamp.out - test output for 64-bit systems and + * timestamp_1.out - test output for 32-bit systems. + * + */ + CREATE TABLE test_timestamp ( i timestamp From 9e70afeba5c105242f9000ae50b680e6024e5d23 Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Tue, 17 Dec 2024 15:01:24 +0300 Subject: [PATCH 176/182] Tidying up update scripts. 1) remove perl scripts for generating sql files, 2) revert saving of old versions of init files, 3) clean Makefile --- Makefile | 25 +- gen_rum_sql--1.0--1.1.pl | 335 ------- gen_rum_sql--1.1--1.2.pl | 183 ---- rum--1.0.sql | 411 -------- rum--1.1.sql | 1513 ------------------------------ rum--1.2.sql | 1707 ---------------------------------- rum--1.3.sql => rum_init.sql | 0 7 files changed, 7 insertions(+), 4167 deletions(-) delete mode 100644 gen_rum_sql--1.0--1.1.pl delete mode 100644 gen_rum_sql--1.1--1.2.pl delete mode 100644 rum--1.0.sql delete mode 100644 rum--1.1.sql delete mode 100644 rum--1.2.sql rename rum--1.3.sql => rum_init.sql (100%) diff --git a/Makefile b/Makefile index ab4ee1df38..cca576da85 100644 --- a/Makefile +++ b/Makefile @@ -11,14 +11,10 @@ OBJS = src/rumsort.o src/rum_ts_utils.o src/rumtsquery.o \ src/rumscan.o src/rumutil.o src/rumvacuum.o src/rumvalidate.o \ src/btree_rum.o src/rum_arr_utils.o $(WIN32RES) -DATA_first = rum--1.0.sql DATA_updates = rum--1.0--1.1.sql rum--1.1--1.2.sql \ rum--1.2--1.3.sql -DATA = $(DATA_first) rum--$(EXTVERSION).sql $(DATA_updates) - -# Do not use DATA_built. It removes built files if clean target was used -SQL_built = rum--$(EXTVERSION).sql $(DATA_updates) +DATA_built = $(EXTENSION)--$(EXTVERSION).sql INCLUDES = rum.h rumsort.h RELATIVE_INCLUDES = $(addprefix src/, $(INCLUDES)) @@ -28,8 +24,8 @@ LDFLAGS_SL += $(filter -lm, $(LIBS)) REGRESS = security rum rum_validate rum_hash ruminv timestamp orderby orderby_hash \ altorder altorder_hash limits \ int2 int4 int8 float4 float8 money oid \ - time timetz date interval \ - macaddr inet cidr text varchar char bytea bit varbit \ + time timetz date interval \ + macaddr inet cidr text varchar char bytea bit varbit \ numeric rum_weight expr TAP_TESTS = 1 @@ -47,6 +43,9 @@ include $(top_builddir)/src/Makefile.global include $(top_srcdir)/contrib/contrib-global.mk endif +$(EXTENSION)--$(EXTVERSION).sql: rum_init.sql + cat $^ > $@ + ifeq ($(MAJORVERSION), 9.6) # arrays are not supported on 9.6 else @@ -61,16 +60,6 @@ wal-check: temp-install check: wal-check endif -all: $(SQL_built) - -#9.6 requires 1.3 file but 10.0 could live with update files -rum--$(EXTVERSION).sql: $(DATA_first) $(DATA_updates) - cat $(DATA_first) $(DATA_updates) > rum--$(EXTVERSION).sql - -# rule for updates, e.g. rum--1.0--1.1.sql -rum--%.sql: gen_rum_sql--%.pl - perl $< > $@ - install: installincludes installincludes: @@ -92,5 +81,5 @@ submake-rum: isolationcheck: | submake-isolation submake-rum temp-install $(pg_isolation_regress_check) \ - --temp-config $(top_srcdir)/contrib/rum/logical.conf \ + --temp-config $(top_srcdir)/contrib/rum/logical.conf \ $(ISOLATIONCHECKS) diff --git a/gen_rum_sql--1.0--1.1.pl b/gen_rum_sql--1.0--1.1.pl deleted file mode 100644 index 7296f6c023..0000000000 --- a/gen_rum_sql--1.0--1.1.pl +++ /dev/null @@ -1,335 +0,0 @@ -use strict; -use warnings; - -my $func_base_template=< ( - PROCEDURE = rum_TYPEIDENT_distance, - LEFTARG = TYPENAME, - RIGHTARG = TYPENAME, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_TYPEIDENT_left_distance(TYPENAME, TYPENAME) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_TYPEIDENT_left_distance, - LEFTARG = TYPENAME, - RIGHTARG = TYPENAME, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_TYPEIDENT_right_distance(TYPENAME, TYPENAME) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_TYPEIDENT_right_distance, - LEFTARG = TYPENAME, - RIGHTARG = TYPENAME, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_TYPEIDENT_outer_distance(TYPENAME, TYPENAME, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_TYPEIDENT_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -EOT - -my $opclass_base_template=<= TYPESOPARG, - OPERATOR 5 > TYPESOPARG, - FUNCTION 1 TYPECMPFUNC(TYPECMPTYPE,TYPECMPTYPE), - FUNCTION 2 rum_TYPESUBIDENT_extract_value(TYPESUBNAME, internal), - FUNCTION 3 rum_TYPESUBIDENT_extract_query(TYPESUBNAME, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_TYPESUBIDENT_compare_prefix(TYPESUBNAME,TYPESUBNAME,int2, internal), -STORAGE TYPENAME; - -EOT - -my $opclass_distance_template=<= TYPESOPARG, - OPERATOR 5 > TYPESOPARG, - OPERATOR 20 <=> (TYPENAME,TYPENAME) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (TYPENAME,TYPENAME) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (TYPENAME,TYPENAME) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 TYPECMPFUNC(TYPECMPTYPE,TYPECMPTYPE), - FUNCTION 2 rum_TYPESUBIDENT_extract_value(TYPESUBNAME, internal), - FUNCTION 3 rum_TYPESUBIDENT_extract_query(TYPESUBNAME, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_TYPESUBIDENT_compare_prefix(TYPESUBNAME,TYPESUBNAME,int2, internal), - -- support to TYPEIDENT distance in rum_tsvector_addon_ops - FUNCTION 6 rum_TYPEIDENT_config(internal), - FUNCTION 9 rum_TYPEIDENT_outer_distance(TYPENAME, TYPENAME, smallint), -STORAGE TYPENAME; - -EOT - -my @opinfo = map { - $_->{TYPEIDENT} = $_->{TYPENAME} if ! exists $_->{TYPEIDENT}; - $_->{TYPECMPTYPE} = $_->{TYPENAME} if !exists $_->{TYPECMPTYPE}; - $_->{TYPESUBNAME} = $_->{TYPENAME} if !exists $_->{TYPESUBNAME}; - $_->{TYPESUBIDENT}= $_->{TYPEIDENT} if ! exists $_->{TYPESUBIDENT}; - $_->{TYPESOPARG}= '' if ! exists $_->{TYPESOPARG}; - $_ - } ( - # timestamp/tz aren't here: they are in rum--1.0.sql - - { - TYPENAME => 'int2', - TYPECMPFUNC => 'btint2cmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'int4', - TYPECMPFUNC => 'btint4cmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'int8', - TYPECMPFUNC => 'btint8cmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'float4', - TYPECMPFUNC => 'btfloat4cmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'float8', - TYPECMPFUNC => 'btfloat8cmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'money', - TYPECMPFUNC => 'cash_cmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'oid', - TYPECMPFUNC => 'btoidcmp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'time', - TYPECMPFUNC => 'time_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'timetz', - TYPECMPFUNC => 'timetz_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'date', - TYPECMPFUNC => 'date_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'interval', - TYPECMPFUNC => 'interval_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'macaddr', - TYPECMPFUNC => 'macaddr_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'inet', - TYPECMPFUNC => 'network_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'cidr', - TYPECMPFUNC => 'network_cmp', - TYPECMPTYPE => 'inet', - TYPESOPARG => '(inet, inet)', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'text', - TYPECMPFUNC => 'bttextcmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'varchar', - TYPECMPFUNC => 'bttextcmp', - TYPECMPTYPE => 'text', - TYPESUBIDENT=> 'text', - TYPESUBNAME => 'text', - TYPESOPARG => '(text, text)', - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => '"char"', - TYPEIDENT => 'char', - TYPECMPFUNC => 'btcharcmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'bytea', - TYPECMPFUNC => 'byteacmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'bit', - TYPECMPFUNC => 'bitcmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'varbit', - TYPECMPFUNC => 'varbitcmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, - { - TYPENAME => 'numeric', - TYPECMPFUNC => 'rum_numeric_cmp', - func_tmpl => \$func_base_template, - opclass_tmpl=> \$opclass_base_template, - }, -); - -##############Generate!!! - -print <{TYPENAME}-----------------------*/\n\n"; - - for my $v (qw(func_tmpl opclass_tmpl)) - { - next if !exists $t->{$v}; - - my $x = ${$t->{$v}}; - - for my $k (grep {uc($_) eq $_} keys %$t) - { - $x=~s/$k/$t->{$k}/g; - } - - print $x; - } -} - -# Drop doesn't work -#print <{TYPEIDENT} = $_->{TYPENAME} if !exists $_->{TYPEIDENT}; - $_ - } ( - { - TYPENAME => 'int2', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'int4', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'int8', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'float4', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'float8', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'money', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'oid', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'timestamp', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, - { - TYPENAME => 'timestamptz', - func_tmpl => \$func_distance_template, - opclass_tmpl=> \$opclass_distance_template, - }, -); - -##############Generate!!! - -print < ( - PROCEDURE = rum_anyarray_distance, - LEFTARG = anyarray, - RIGHTARG = anyarray, - COMMUTATOR = '<=>' -); - - -CREATE FUNCTION rum_extract_anyarray(anyarray,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_anyarray_query(anyarray,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_anyarray_consistent(internal, smallint, anyarray, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_anyarray_ordering(internal,smallint,anyarray,int,internal,internal,internal,internal,internal) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -CREATE OPERATOR CLASS rum_anyarray_ops -DEFAULT FOR TYPE anyarray USING rum -AS - OPERATOR 1 && (anyarray, anyarray), - OPERATOR 2 @> (anyarray, anyarray), - OPERATOR 3 <@ (anyarray, anyarray), - OPERATOR 4 = (anyarray, anyarray), - OPERATOR 5 % (anyarray, anyarray), - OPERATOR 20 <=> (anyarray, anyarray) FOR ORDER BY pg_catalog.float_ops, - --dispatch function 1 for concrete type - FUNCTION 2 rum_extract_anyarray(anyarray,internal,internal,internal,internal), - FUNCTION 3 rum_extract_anyarray_query(anyarray,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_anyarray_consistent(internal,smallint,anyarray,integer,internal,internal,internal,internal), - FUNCTION 6 rum_anyarray_config(internal), - FUNCTION 8 rum_anyarray_ordering(internal,smallint,anyarray,int,internal,internal,internal,internal,internal), - STORAGE anyelement; - -CREATE OPERATOR CLASS rum_anyarray_addon_ops -FOR TYPE anyarray USING rum -AS - OPERATOR 1 && (anyarray, anyarray), - OPERATOR 2 @> (anyarray, anyarray), - OPERATOR 3 <@ (anyarray, anyarray), - OPERATOR 4 = (anyarray, anyarray), - --dispatch function 1 for concrete type - FUNCTION 2 ginarrayextract(anyarray,internal,internal), - FUNCTION 3 ginqueryarrayextract(anyarray,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 ginarrayconsistent(internal,smallint,anyarray,integer,internal,internal,internal,internal), - STORAGE anyelement; - -EOT - -foreach my $t (@opinfo) -{ - print "/*--------------------$t->{TYPENAME}-----------------------*/\n\n"; - - for my $v (qw(func_tmpl opclass_tmpl)) - { - next if !exists $t->{$v}; - - my $x = ${$t->{$v}}; - - for my $k (grep {uc($_) eq $_} keys %$t) - { - $x=~s/$k/$t->{$k}/g; - } - - print $x; - } -} diff --git a/rum--1.0.sql b/rum--1.0.sql deleted file mode 100644 index fd2616b204..0000000000 --- a/rum--1.0.sql +++ /dev/null @@ -1,411 +0,0 @@ -CREATE FUNCTION rumhandler(internal) -RETURNS index_am_handler -AS 'MODULE_PATHNAME' -LANGUAGE C; - -/* - * RUM access method - */ - -CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler; - -/* - * RUM built-in types, operators and functions - */ - --- Type used in distance calculations with normalization argument -CREATE TYPE rum_distance_query AS (query tsquery, method int); - -CREATE FUNCTION tsquery_to_distance_query(tsquery) -RETURNS rum_distance_query -AS 'MODULE_PATHNAME', 'tsquery_to_distance_query' -LANGUAGE C IMMUTABLE STRICT; - -CREATE CAST (tsquery AS rum_distance_query) - WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT; - -CREATE FUNCTION rum_ts_distance(tsvector,tsquery) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_tt' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_td' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - LEFTARG = tsvector, - RIGHTARG = tsquery, - PROCEDURE = rum_ts_distance -); - -CREATE OPERATOR <=> ( - LEFTARG = tsvector, - RIGHTARG = rum_distance_query, - PROCEDURE = rum_ts_distance -); - -CREATE FUNCTION rum_timestamp_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_timestamp_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_timestamp_left_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_timestamp_left_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_timestamp_right_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_timestamp_right_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = <=| -); - -/* - * rum_tsvector_ops operator class - */ - -CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsvector_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - --- To prevent calling from SQL -CREATE FUNCTION rum_ts_join_pos(internal, internal) -RETURNS bytea -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsvector_ops -DEFAULT FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 6 rum_tsvector_config(internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), - FUNCTION 10 rum_ts_join_pos(internal, internal), - STORAGE text; - -/* - * rum_tsvector_hash_ops operator class. - * - * Stores hash of entries as keys in index. - */ - -CREATE FUNCTION rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsvector_hash_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 6 rum_tsvector_config(internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), - FUNCTION 10 rum_ts_join_pos(internal, internal), - STORAGE integer; - -/* - * rum_timestamp_ops operator class - */ - --- timestamp operator class - -CREATE FUNCTION rum_timestamp_extract_value(timestamp,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_outer_distance(timestamp, timestamp, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE OPERATOR CLASS rum_timestamp_ops -DEFAULT FOR TYPE timestamp USING rum -AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, - --support - FUNCTION 1 timestamp_cmp(timestamp,timestamp), - FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), - FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), - FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), - FUNCTION 6 rum_timestamp_config(internal), - -- support to timestamp distance in rum_tsvector_timestamp_ops - FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), - OPERATOR 20 <=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, -STORAGE timestamp; - -/* - * rum_tsvector_timestamp_ops operator class. - * - * Stores timestamp with tsvector. - */ - -CREATE FUNCTION rum_tsquery_timestamp_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -/* - * !!!deprecated, use rum_tsvector_addon_ops!!! - */ -CREATE OPERATOR CLASS rum_tsvector_timestamp_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -/* - * rum_tsvector_hash_timestamp_ops operator class - * !!!deprecated, use rum_tsvector_hash_addon_ops!!! - */ - -CREATE OPERATOR CLASS rum_tsvector_hash_timestamp_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/* - * rum_timestamptz_ops operator class - */ - -CREATE FUNCTION rum_timestamptz_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_timestamptz_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_timestamptz_left_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_left_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_timestamptz_left_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_timestamptz_right_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_right_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_timestamptz_right_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = <=| -); - -CREATE OPERATOR CLASS rum_timestamptz_ops -DEFAULT FOR TYPE timestamptz USING rum -AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, - --support - FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz), - FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), - FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), - FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), - FUNCTION 6 rum_timestamp_config(internal), - -- support to timestamptz distance in rum_tsvector_timestamptz_ops - FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), - OPERATOR 20 <=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, -STORAGE timestamptz; - -/* - * rum_tsvector_timestamptz_ops operator class. - * - * Stores tsvector with timestamptz. - */ - -CREATE OPERATOR CLASS rum_tsvector_timestamptz_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -/* - * rum_tsvector_hash_timestamptz_ops operator class - */ - -CREATE OPERATOR CLASS rum_tsvector_hash_timestamptz_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/* - * rum_tsquery_ops operator class. - * - * Used for inversed text search. - */ - -CREATE FUNCTION ruminv_extract_tsquery(tsquery,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_tsvector_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_tsquery_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsquery_ops -DEFAULT FOR TYPE tsquery USING rum -AS - OPERATOR 1 @@ (tsquery, tsvector), - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 ruminv_extract_tsquery(tsquery,internal,internal,internal,internal), - FUNCTION 3 ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 ruminv_tsvector_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 6 ruminv_tsquery_config(internal), - STORAGE text; diff --git a/rum--1.1.sql b/rum--1.1.sql deleted file mode 100644 index 88762a2411..0000000000 --- a/rum--1.1.sql +++ /dev/null @@ -1,1513 +0,0 @@ -CREATE FUNCTION rumhandler(internal) -RETURNS index_am_handler -AS 'MODULE_PATHNAME' -LANGUAGE C; - -/* - * RUM access method - */ - -CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler; - -/* - * RUM built-in types, operators and functions - */ - --- Type used in distance calculations with normalization argument -CREATE TYPE rum_distance_query AS (query tsquery, method int); - -CREATE FUNCTION tsquery_to_distance_query(tsquery) -RETURNS rum_distance_query -AS 'MODULE_PATHNAME', 'tsquery_to_distance_query' -LANGUAGE C IMMUTABLE STRICT; - -CREATE CAST (tsquery AS rum_distance_query) - WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT; - -CREATE FUNCTION rum_ts_distance(tsvector,tsquery) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_tt' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_td' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - LEFTARG = tsvector, - RIGHTARG = tsquery, - PROCEDURE = rum_ts_distance -); - -CREATE OPERATOR <=> ( - LEFTARG = tsvector, - RIGHTARG = rum_distance_query, - PROCEDURE = rum_ts_distance -); - -CREATE FUNCTION rum_timestamp_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_timestamp_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_timestamp_left_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_timestamp_left_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_timestamp_right_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_timestamp_right_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = <=| -); - -/* - * rum_tsvector_ops operator class - */ - -CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsvector_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - --- To prevent calling from SQL -CREATE FUNCTION rum_ts_join_pos(internal, internal) -RETURNS bytea -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsvector_ops -DEFAULT FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 6 rum_tsvector_config(internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), - FUNCTION 10 rum_ts_join_pos(internal, internal), - STORAGE text; - -/* - * rum_tsvector_hash_ops operator class. - * - * Stores hash of entries as keys in index. - */ - -CREATE FUNCTION rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsvector_hash_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 6 rum_tsvector_config(internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), - FUNCTION 10 rum_ts_join_pos(internal, internal), - STORAGE integer; - -/* - * rum_timestamp_ops operator class - */ - --- timestamp operator class - -CREATE FUNCTION rum_timestamp_extract_value(timestamp,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_outer_distance(timestamp, timestamp, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE OPERATOR CLASS rum_timestamp_ops -DEFAULT FOR TYPE timestamp USING rum -AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, - --support - FUNCTION 1 timestamp_cmp(timestamp,timestamp), - FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), - FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), - FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), - FUNCTION 6 rum_timestamp_config(internal), - -- support to timestamp disttance in rum_tsvector_timestamp_ops - FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), - OPERATOR 20 <=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, -STORAGE timestamp; - -/* - * rum_tsvector_timestamp_ops operator class. - * - * Stores timestamp with tsvector. - */ - -CREATE FUNCTION rum_tsquery_timestamp_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -/* - * !!!deprecated, use rum_tsvector_hash_addon_ops!!! - */ -CREATE OPERATOR CLASS rum_tsvector_timestamp_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -/* - * rum_tsvector_hash_timestamp_ops operator class - * !!!deprecated, use rum_tsvector_hash_addon_ops!!! - */ - -CREATE OPERATOR CLASS rum_tsvector_hash_timestamp_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/* - * rum_timestamptz_ops operator class - */ - -CREATE FUNCTION rum_timestamptz_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_timestamptz_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_timestamptz_left_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_left_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_timestamptz_left_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_timestamptz_right_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_right_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_timestamptz_right_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = <=| -); - -CREATE OPERATOR CLASS rum_timestamptz_ops -DEFAULT FOR TYPE timestamptz USING rum -AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, - --support - FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz), - FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), - FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), - FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), - FUNCTION 6 rum_timestamp_config(internal), - -- support to timestamptz distance in rum_tsvector_timestamptz_ops - FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), - OPERATOR 20 <=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, -STORAGE timestamptz; - -/* - * rum_tsvector_timestamptz_ops operator class. - * - * Stores tsvector with timestamptz. - */ - -CREATE OPERATOR CLASS rum_tsvector_timestamptz_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -/* - * rum_tsvector_hash_timestamptz_ops operator class - */ - -CREATE OPERATOR CLASS rum_tsvector_hash_timestamptz_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/* - * rum_tsquery_ops operator class. - * - * Used for inversed text search. - */ - -CREATE FUNCTION ruminv_extract_tsquery(tsquery,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_tsvector_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_tsquery_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsquery_ops -DEFAULT FOR TYPE tsquery USING rum -AS - OPERATOR 1 @@ (tsquery, tsvector), - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 ruminv_extract_tsquery(tsquery,internal,internal,internal,internal), - FUNCTION 3 ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 ruminv_tsvector_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 6 ruminv_tsquery_config(internal), - STORAGE text; -CREATE FUNCTION rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -ALTER FUNCTION - rum_tsquery_timestamp_consistent (internal,smallint,tsvector,int,internal,internal,internal,internal) - RENAME TO rum_tsquery_addon_consistent; - -CREATE FUNCTION rum_numeric_cmp(numeric, numeric) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE OPERATOR CLASS rum_tsvector_addon_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -CREATE OPERATOR CLASS rum_tsvector_hash_addon_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/*--------------------int2-----------------------*/ - -CREATE FUNCTION rum_int2_extract_value(int2, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int2_compare_prefix(int2, int2, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int2_extract_query(int2, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_int2_distance(int2, int2) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_int2_distance, - LEFTARG = int2, - RIGHTARG = int2, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_int2_left_distance(int2, int2) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_int2_left_distance, - LEFTARG = int2, - RIGHTARG = int2, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_int2_right_distance(int2, int2) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_int2_right_distance, - LEFTARG = int2, - RIGHTARG = int2, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_int2_outer_distance(int2, int2, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int2_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_int2_ops -DEFAULT FOR TYPE int2 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (int2,int2) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint2cmp(int2,int2), - FUNCTION 2 rum_int2_extract_value(int2, internal), - FUNCTION 3 rum_int2_extract_query(int2, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_int2_compare_prefix(int2,int2,int2, internal), - -- support to int2 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_int2_config(internal), - FUNCTION 9 rum_int2_outer_distance(int2, int2, smallint), -STORAGE int2; - -/*--------------------int4-----------------------*/ - -CREATE FUNCTION rum_int4_extract_value(int4, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int4_compare_prefix(int4, int4, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int4_extract_query(int4, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_int4_distance(int4, int4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_int4_distance, - LEFTARG = int4, - RIGHTARG = int4, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_int4_left_distance(int4, int4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_int4_left_distance, - LEFTARG = int4, - RIGHTARG = int4, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_int4_right_distance(int4, int4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_int4_right_distance, - LEFTARG = int4, - RIGHTARG = int4, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_int4_outer_distance(int4, int4, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int4_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_int4_ops -DEFAULT FOR TYPE int4 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (int4,int4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint4cmp(int4,int4), - FUNCTION 2 rum_int4_extract_value(int4, internal), - FUNCTION 3 rum_int4_extract_query(int4, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_int4_compare_prefix(int4,int4,int2, internal), - -- support to int4 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_int4_config(internal), - FUNCTION 9 rum_int4_outer_distance(int4, int4, smallint), -STORAGE int4; - -/*--------------------int8-----------------------*/ - -CREATE FUNCTION rum_int8_extract_value(int8, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int8_compare_prefix(int8, int8, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int8_extract_query(int8, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_int8_distance(int8, int8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_int8_distance, - LEFTARG = int8, - RIGHTARG = int8, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_int8_left_distance(int8, int8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_int8_left_distance, - LEFTARG = int8, - RIGHTARG = int8, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_int8_right_distance(int8, int8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_int8_right_distance, - LEFTARG = int8, - RIGHTARG = int8, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_int8_outer_distance(int8, int8, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int8_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_int8_ops -DEFAULT FOR TYPE int8 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (int8,int8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint8cmp(int8,int8), - FUNCTION 2 rum_int8_extract_value(int8, internal), - FUNCTION 3 rum_int8_extract_query(int8, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_int8_compare_prefix(int8,int8,int2, internal), - -- support to int8 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_int8_config(internal), - FUNCTION 9 rum_int8_outer_distance(int8, int8, smallint), -STORAGE int8; - -/*--------------------float4-----------------------*/ - -CREATE FUNCTION rum_float4_extract_value(float4, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float4_compare_prefix(float4, float4, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float4_extract_query(float4, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_float4_distance(float4, float4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_float4_distance, - LEFTARG = float4, - RIGHTARG = float4, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_float4_left_distance(float4, float4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_float4_left_distance, - LEFTARG = float4, - RIGHTARG = float4, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_float4_right_distance(float4, float4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_float4_right_distance, - LEFTARG = float4, - RIGHTARG = float4, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_float4_outer_distance(float4, float4, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float4_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_float4_ops -DEFAULT FOR TYPE float4 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (float4,float4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btfloat4cmp(float4,float4), - FUNCTION 2 rum_float4_extract_value(float4, internal), - FUNCTION 3 rum_float4_extract_query(float4, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_float4_compare_prefix(float4,float4,int2, internal), - -- support to float4 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_float4_config(internal), - FUNCTION 9 rum_float4_outer_distance(float4, float4, smallint), -STORAGE float4; - -/*--------------------float8-----------------------*/ - -CREATE FUNCTION rum_float8_extract_value(float8, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float8_compare_prefix(float8, float8, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float8_extract_query(float8, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_float8_distance(float8, float8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_float8_distance, - LEFTARG = float8, - RIGHTARG = float8, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_float8_left_distance(float8, float8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_float8_left_distance, - LEFTARG = float8, - RIGHTARG = float8, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_float8_right_distance(float8, float8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_float8_right_distance, - LEFTARG = float8, - RIGHTARG = float8, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_float8_outer_distance(float8, float8, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float8_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_float8_ops -DEFAULT FOR TYPE float8 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (float8,float8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btfloat8cmp(float8,float8), - FUNCTION 2 rum_float8_extract_value(float8, internal), - FUNCTION 3 rum_float8_extract_query(float8, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_float8_compare_prefix(float8,float8,int2, internal), - -- support to float8 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_float8_config(internal), - FUNCTION 9 rum_float8_outer_distance(float8, float8, smallint), -STORAGE float8; - -/*--------------------money-----------------------*/ - -CREATE FUNCTION rum_money_extract_value(money, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_money_compare_prefix(money, money, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_money_extract_query(money, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_money_distance(money, money) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_money_distance, - LEFTARG = money, - RIGHTARG = money, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_money_left_distance(money, money) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_money_left_distance, - LEFTARG = money, - RIGHTARG = money, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_money_right_distance(money, money) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_money_right_distance, - LEFTARG = money, - RIGHTARG = money, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_money_outer_distance(money, money, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_money_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_money_ops -DEFAULT FOR TYPE money USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (money,money) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (money,money) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (money,money) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 cash_cmp(money,money), - FUNCTION 2 rum_money_extract_value(money, internal), - FUNCTION 3 rum_money_extract_query(money, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_money_compare_prefix(money,money,int2, internal), - -- support to money distance in rum_tsvector_addon_ops - FUNCTION 6 rum_money_config(internal), - FUNCTION 9 rum_money_outer_distance(money, money, smallint), -STORAGE money; - -/*--------------------oid-----------------------*/ - -CREATE FUNCTION rum_oid_extract_value(oid, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_oid_compare_prefix(oid, oid, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_oid_extract_query(oid, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_oid_distance(oid, oid) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_oid_distance, - LEFTARG = oid, - RIGHTARG = oid, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_oid_left_distance(oid, oid) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_oid_left_distance, - LEFTARG = oid, - RIGHTARG = oid, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_oid_right_distance(oid, oid) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_oid_right_distance, - LEFTARG = oid, - RIGHTARG = oid, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_oid_outer_distance(oid, oid, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_oid_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_oid_ops -DEFAULT FOR TYPE oid USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (oid,oid) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btoidcmp(oid,oid), - FUNCTION 2 rum_oid_extract_value(oid, internal), - FUNCTION 3 rum_oid_extract_query(oid, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_oid_compare_prefix(oid,oid,int2, internal), - -- support to oid distance in rum_tsvector_addon_ops - FUNCTION 6 rum_oid_config(internal), - FUNCTION 9 rum_oid_outer_distance(oid, oid, smallint), -STORAGE oid; - -/*--------------------time-----------------------*/ - -CREATE FUNCTION rum_time_extract_value(time, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_time_compare_prefix(time, time, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_time_extract_query(time, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_time_ops -DEFAULT FOR TYPE time USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 time_cmp(time,time), - FUNCTION 2 rum_time_extract_value(time, internal), - FUNCTION 3 rum_time_extract_query(time, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_time_compare_prefix(time,time,int2, internal), -STORAGE time; - -/*--------------------timetz-----------------------*/ - -CREATE FUNCTION rum_timetz_extract_value(timetz, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timetz_compare_prefix(timetz, timetz, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timetz_extract_query(timetz, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_timetz_ops -DEFAULT FOR TYPE timetz USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 timetz_cmp(timetz,timetz), - FUNCTION 2 rum_timetz_extract_value(timetz, internal), - FUNCTION 3 rum_timetz_extract_query(timetz, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_timetz_compare_prefix(timetz,timetz,int2, internal), -STORAGE timetz; - -/*--------------------date-----------------------*/ - -CREATE FUNCTION rum_date_extract_value(date, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_date_compare_prefix(date, date, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_date_extract_query(date, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_date_ops -DEFAULT FOR TYPE date USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 date_cmp(date,date), - FUNCTION 2 rum_date_extract_value(date, internal), - FUNCTION 3 rum_date_extract_query(date, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_date_compare_prefix(date,date,int2, internal), -STORAGE date; - -/*--------------------interval-----------------------*/ - -CREATE FUNCTION rum_interval_extract_value(interval, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_interval_compare_prefix(interval, interval, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_interval_extract_query(interval, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_interval_ops -DEFAULT FOR TYPE interval USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 interval_cmp(interval,interval), - FUNCTION 2 rum_interval_extract_value(interval, internal), - FUNCTION 3 rum_interval_extract_query(interval, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_interval_compare_prefix(interval,interval,int2, internal), -STORAGE interval; - -/*--------------------macaddr-----------------------*/ - -CREATE FUNCTION rum_macaddr_extract_value(macaddr, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_macaddr_compare_prefix(macaddr, macaddr, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_macaddr_extract_query(macaddr, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_macaddr_ops -DEFAULT FOR TYPE macaddr USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 macaddr_cmp(macaddr,macaddr), - FUNCTION 2 rum_macaddr_extract_value(macaddr, internal), - FUNCTION 3 rum_macaddr_extract_query(macaddr, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_macaddr_compare_prefix(macaddr,macaddr,int2, internal), -STORAGE macaddr; - -/*--------------------inet-----------------------*/ - -CREATE FUNCTION rum_inet_extract_value(inet, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_inet_compare_prefix(inet, inet, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_inet_extract_query(inet, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_inet_ops -DEFAULT FOR TYPE inet USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 network_cmp(inet,inet), - FUNCTION 2 rum_inet_extract_value(inet, internal), - FUNCTION 3 rum_inet_extract_query(inet, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_inet_compare_prefix(inet,inet,int2, internal), -STORAGE inet; - -/*--------------------cidr-----------------------*/ - -CREATE FUNCTION rum_cidr_extract_value(cidr, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_cidr_compare_prefix(cidr, cidr, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_cidr_extract_query(cidr, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_cidr_ops -DEFAULT FOR TYPE cidr USING rum -AS - OPERATOR 1 < (inet, inet), - OPERATOR 2 <= (inet, inet), - OPERATOR 3 = (inet, inet), - OPERATOR 4 >= (inet, inet), - OPERATOR 5 > (inet, inet), - FUNCTION 1 network_cmp(inet,inet), - FUNCTION 2 rum_cidr_extract_value(cidr, internal), - FUNCTION 3 rum_cidr_extract_query(cidr, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_cidr_compare_prefix(cidr,cidr,int2, internal), -STORAGE cidr; - -/*--------------------text-----------------------*/ - -CREATE FUNCTION rum_text_extract_value(text, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_text_compare_prefix(text, text, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_text_extract_query(text, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_text_ops -DEFAULT FOR TYPE text USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 bttextcmp(text,text), - FUNCTION 2 rum_text_extract_value(text, internal), - FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), -STORAGE text; - -/*--------------------varchar-----------------------*/ - - -CREATE OPERATOR CLASS rum_varchar_ops -DEFAULT FOR TYPE varchar USING rum -AS - OPERATOR 1 < (text, text), - OPERATOR 2 <= (text, text), - OPERATOR 3 = (text, text), - OPERATOR 4 >= (text, text), - OPERATOR 5 > (text, text), - FUNCTION 1 bttextcmp(text,text), - FUNCTION 2 rum_text_extract_value(text, internal), - FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), -STORAGE varchar; - -/*--------------------"char"-----------------------*/ - -CREATE FUNCTION rum_char_extract_value("char", internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_char_compare_prefix("char", "char", int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_char_extract_query("char", internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_char_ops -DEFAULT FOR TYPE "char" USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 btcharcmp("char","char"), - FUNCTION 2 rum_char_extract_value("char", internal), - FUNCTION 3 rum_char_extract_query("char", internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_char_compare_prefix("char","char",int2, internal), -STORAGE "char"; - -/*--------------------bytea-----------------------*/ - -CREATE FUNCTION rum_bytea_extract_value(bytea, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bytea_compare_prefix(bytea, bytea, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bytea_extract_query(bytea, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_bytea_ops -DEFAULT FOR TYPE bytea USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 byteacmp(bytea,bytea), - FUNCTION 2 rum_bytea_extract_value(bytea, internal), - FUNCTION 3 rum_bytea_extract_query(bytea, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_bytea_compare_prefix(bytea,bytea,int2, internal), -STORAGE bytea; - -/*--------------------bit-----------------------*/ - -CREATE FUNCTION rum_bit_extract_value(bit, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bit_compare_prefix(bit, bit, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bit_extract_query(bit, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_bit_ops -DEFAULT FOR TYPE bit USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 bitcmp(bit,bit), - FUNCTION 2 rum_bit_extract_value(bit, internal), - FUNCTION 3 rum_bit_extract_query(bit, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_bit_compare_prefix(bit,bit,int2, internal), -STORAGE bit; - -/*--------------------varbit-----------------------*/ - -CREATE FUNCTION rum_varbit_extract_value(varbit, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_varbit_compare_prefix(varbit, varbit, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_varbit_extract_query(varbit, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_varbit_ops -DEFAULT FOR TYPE varbit USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 varbitcmp(varbit,varbit), - FUNCTION 2 rum_varbit_extract_value(varbit, internal), - FUNCTION 3 rum_varbit_extract_query(varbit, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_varbit_compare_prefix(varbit,varbit,int2, internal), -STORAGE varbit; - -/*--------------------numeric-----------------------*/ - -CREATE FUNCTION rum_numeric_extract_value(numeric, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_numeric_compare_prefix(numeric, numeric, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_numeric_extract_query(numeric, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_numeric_ops -DEFAULT FOR TYPE numeric USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 rum_numeric_cmp(numeric,numeric), - FUNCTION 2 rum_numeric_extract_value(numeric, internal), - FUNCTION 3 rum_numeric_extract_query(numeric, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_numeric_compare_prefix(numeric,numeric,int2, internal), -STORAGE numeric; - diff --git a/rum--1.2.sql b/rum--1.2.sql deleted file mode 100644 index 313de039b4..0000000000 --- a/rum--1.2.sql +++ /dev/null @@ -1,1707 +0,0 @@ -CREATE FUNCTION rumhandler(internal) -RETURNS index_am_handler -AS 'MODULE_PATHNAME' -LANGUAGE C; - -/* - * RUM access method - */ - -CREATE ACCESS METHOD rum TYPE INDEX HANDLER rumhandler; - -/* - * RUM built-in types, operators and functions - */ - --- Type used in distance calculations with normalization argument -CREATE TYPE rum_distance_query AS (query tsquery, method int); - -CREATE FUNCTION tsquery_to_distance_query(tsquery) -RETURNS rum_distance_query -AS 'MODULE_PATHNAME', 'tsquery_to_distance_query' -LANGUAGE C IMMUTABLE STRICT; - -CREATE CAST (tsquery AS rum_distance_query) - WITH FUNCTION tsquery_to_distance_query(tsquery) AS IMPLICIT; - -CREATE FUNCTION rum_ts_distance(tsvector,tsquery) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_tt' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_ts_distance(tsvector,tsquery,int) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_ttf' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_ts_distance(tsvector,rum_distance_query) -RETURNS float4 -AS 'MODULE_PATHNAME', 'rum_ts_distance_td' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - LEFTARG = tsvector, - RIGHTARG = tsquery, - PROCEDURE = rum_ts_distance -); - -CREATE OPERATOR <=> ( - LEFTARG = tsvector, - RIGHTARG = rum_distance_query, - PROCEDURE = rum_ts_distance -); - -CREATE FUNCTION rum_timestamp_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_timestamp_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_timestamp_left_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_timestamp_left_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_timestamp_right_distance(timestamp, timestamp) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_timestamp_right_distance, - LEFTARG = timestamp, - RIGHTARG = timestamp, - COMMUTATOR = <=| -); - -/* - * rum_tsvector_ops operator class - */ - -CREATE FUNCTION rum_extract_tsvector(tsvector,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsvector_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - --- To prevent calling from SQL -CREATE FUNCTION rum_ts_join_pos(internal, internal) -RETURNS bytea -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsvector_ops -DEFAULT FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 6 rum_tsvector_config(internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), - FUNCTION 10 rum_ts_join_pos(internal, internal), - STORAGE text; - -/* - * rum_tsvector_hash_ops operator class. - * - * Stores hash of entries as keys in index. - */ - -CREATE FUNCTION rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsvector_hash_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - OPERATOR 2 <=> (tsvector, tsquery) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 6 rum_tsvector_config(internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 8 rum_tsquery_distance(internal,smallint,tsvector,int,internal,internal,internal,internal,internal), - FUNCTION 10 rum_ts_join_pos(internal, internal), - STORAGE integer; - -/* - * rum_timestamp_ops operator class - */ - --- timestamp operator class - -CREATE FUNCTION rum_timestamp_extract_value(timestamp,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timestamp_outer_distance(timestamp, timestamp, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE OPERATOR CLASS rum_timestamp_ops -DEFAULT FOR TYPE timestamp USING rum -AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, - --support - FUNCTION 1 timestamp_cmp(timestamp,timestamp), - FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), - FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), - FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), - FUNCTION 6 rum_timestamp_config(internal), - -- support to timestamp distance in rum_tsvector_timestamp_ops - FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), - OPERATOR 20 <=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (timestamp,timestamp) FOR ORDER BY pg_catalog.float_ops, -STORAGE timestamp; - -/* - * rum_tsvector_timestamp_ops operator class. - * - * Stores timestamp with tsvector. - */ - -CREATE FUNCTION rum_tsquery_timestamp_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -/* - * !!!deprecated, use rum_tsvector_addon_ops!!! - */ -CREATE OPERATOR CLASS rum_tsvector_timestamp_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -/* - * rum_tsvector_hash_timestamp_ops operator class - * !!!deprecated, use rum_tsvector_hash_addon_ops!!! - */ - -CREATE OPERATOR CLASS rum_tsvector_hash_timestamp_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/* - * rum_timestamptz_ops operator class - */ - -CREATE FUNCTION rum_timestamptz_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_timestamptz_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_timestamptz_left_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_left_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_timestamptz_left_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_timestamptz_right_distance(timestamptz, timestamptz) -RETURNS float8 -AS 'MODULE_PATHNAME', 'rum_timestamp_right_distance' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_timestamptz_right_distance, - LEFTARG = timestamptz, - RIGHTARG = timestamptz, - COMMUTATOR = <=| -); - -CREATE OPERATOR CLASS rum_timestamptz_ops -DEFAULT FOR TYPE timestamptz USING rum -AS - OPERATOR 1 <, - OPERATOR 2 <=, - OPERATOR 3 =, - OPERATOR 4 >=, - OPERATOR 5 >, - --support - FUNCTION 1 timestamptz_cmp(timestamptz,timestamptz), - FUNCTION 2 rum_timestamp_extract_value(timestamp,internal,internal,internal,internal), - FUNCTION 3 rum_timestamp_extract_query(timestamp,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_timestamp_consistent(internal,smallint,timestamp,int,internal,internal,internal,internal), - FUNCTION 5 rum_timestamp_compare_prefix(timestamp,timestamp,smallint,internal), - FUNCTION 6 rum_timestamp_config(internal), - -- support to timestamptz distance in rum_tsvector_timestamptz_ops - FUNCTION 9 rum_timestamp_outer_distance(timestamp, timestamp, smallint), - OPERATOR 20 <=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (timestamptz,timestamptz) FOR ORDER BY pg_catalog.float_ops, -STORAGE timestamptz; - -/* - * rum_tsvector_timestamptz_ops operator class. - * - * Stores tsvector with timestamptz. - */ - -CREATE OPERATOR CLASS rum_tsvector_timestamptz_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -/* - * rum_tsvector_hash_timestamptz_ops operator class - */ - -CREATE OPERATOR CLASS rum_tsvector_hash_timestamptz_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_timestamp_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/* - * rum_tsquery_ops operator class. - * - * Used for inversed text search. - */ - -CREATE FUNCTION ruminv_extract_tsquery(tsquery,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_tsvector_consistent(internal, smallint, tsvector, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION ruminv_tsquery_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR CLASS rum_tsquery_ops -DEFAULT FOR TYPE tsquery USING rum -AS - OPERATOR 1 @@ (tsquery, tsvector), - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 ruminv_extract_tsquery(tsquery,internal,internal,internal,internal), - FUNCTION 3 ruminv_extract_tsvector(tsvector,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 ruminv_tsvector_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 6 ruminv_tsquery_config(internal), - STORAGE text; -/* - * RUM version 1.1 - */ - -CREATE FUNCTION rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -ALTER FUNCTION - rum_tsquery_timestamp_consistent (internal,smallint,tsvector,int,internal,internal,internal,internal) - RENAME TO rum_tsquery_addon_consistent; - -CREATE FUNCTION rum_numeric_cmp(numeric, numeric) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE OPERATOR CLASS rum_tsvector_addon_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 gin_cmp_tslexeme(text, text), - FUNCTION 2 rum_extract_tsvector(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 5 gin_cmp_prefix(text,text,smallint,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE text; - -CREATE OPERATOR CLASS rum_tsvector_hash_addon_ops -FOR TYPE tsvector USING rum -AS - OPERATOR 1 @@ (tsvector, tsquery), - --support function - FUNCTION 1 btint4cmp(integer, integer), - FUNCTION 2 rum_extract_tsvector_hash(tsvector,internal,internal,internal,internal), - FUNCTION 3 rum_extract_tsquery_hash(tsquery,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_tsquery_addon_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - FUNCTION 7 rum_tsquery_pre_consistent(internal,smallint,tsvector,int,internal,internal,internal,internal), - STORAGE integer; - -/*--------------------int2-----------------------*/ - -CREATE FUNCTION rum_int2_extract_value(int2, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int2_compare_prefix(int2, int2, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int2_extract_query(int2, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_int2_distance(int2, int2) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_int2_distance, - LEFTARG = int2, - RIGHTARG = int2, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_int2_left_distance(int2, int2) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_int2_left_distance, - LEFTARG = int2, - RIGHTARG = int2, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_int2_right_distance(int2, int2) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_int2_right_distance, - LEFTARG = int2, - RIGHTARG = int2, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_int2_outer_distance(int2, int2, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int2_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_int2_ops -DEFAULT FOR TYPE int2 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (int2,int2) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (int2,int2) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint2cmp(int2,int2), - FUNCTION 2 rum_int2_extract_value(int2, internal), - FUNCTION 3 rum_int2_extract_query(int2, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_int2_compare_prefix(int2,int2,int2, internal), - -- support to int2 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_int2_config(internal), - FUNCTION 9 rum_int2_outer_distance(int2, int2, smallint), -STORAGE int2; - -/*--------------------int4-----------------------*/ - -CREATE FUNCTION rum_int4_extract_value(int4, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int4_compare_prefix(int4, int4, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int4_extract_query(int4, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_int4_distance(int4, int4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_int4_distance, - LEFTARG = int4, - RIGHTARG = int4, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_int4_left_distance(int4, int4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_int4_left_distance, - LEFTARG = int4, - RIGHTARG = int4, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_int4_right_distance(int4, int4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_int4_right_distance, - LEFTARG = int4, - RIGHTARG = int4, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_int4_outer_distance(int4, int4, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int4_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_int4_ops -DEFAULT FOR TYPE int4 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (int4,int4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (int4,int4) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint4cmp(int4,int4), - FUNCTION 2 rum_int4_extract_value(int4, internal), - FUNCTION 3 rum_int4_extract_query(int4, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_int4_compare_prefix(int4,int4,int2, internal), - -- support to int4 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_int4_config(internal), - FUNCTION 9 rum_int4_outer_distance(int4, int4, smallint), -STORAGE int4; - -/*--------------------int8-----------------------*/ - -CREATE FUNCTION rum_int8_extract_value(int8, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int8_compare_prefix(int8, int8, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int8_extract_query(int8, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_int8_distance(int8, int8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_int8_distance, - LEFTARG = int8, - RIGHTARG = int8, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_int8_left_distance(int8, int8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_int8_left_distance, - LEFTARG = int8, - RIGHTARG = int8, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_int8_right_distance(int8, int8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_int8_right_distance, - LEFTARG = int8, - RIGHTARG = int8, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_int8_outer_distance(int8, int8, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_int8_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_int8_ops -DEFAULT FOR TYPE int8 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (int8,int8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (int8,int8) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btint8cmp(int8,int8), - FUNCTION 2 rum_int8_extract_value(int8, internal), - FUNCTION 3 rum_int8_extract_query(int8, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_int8_compare_prefix(int8,int8,int2, internal), - -- support to int8 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_int8_config(internal), - FUNCTION 9 rum_int8_outer_distance(int8, int8, smallint), -STORAGE int8; - -/*--------------------float4-----------------------*/ - -CREATE FUNCTION rum_float4_extract_value(float4, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float4_compare_prefix(float4, float4, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float4_extract_query(float4, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_float4_distance(float4, float4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_float4_distance, - LEFTARG = float4, - RIGHTARG = float4, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_float4_left_distance(float4, float4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_float4_left_distance, - LEFTARG = float4, - RIGHTARG = float4, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_float4_right_distance(float4, float4) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_float4_right_distance, - LEFTARG = float4, - RIGHTARG = float4, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_float4_outer_distance(float4, float4, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float4_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_float4_ops -DEFAULT FOR TYPE float4 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (float4,float4) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (float4,float4) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btfloat4cmp(float4,float4), - FUNCTION 2 rum_float4_extract_value(float4, internal), - FUNCTION 3 rum_float4_extract_query(float4, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_float4_compare_prefix(float4,float4,int2, internal), - -- support to float4 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_float4_config(internal), - FUNCTION 9 rum_float4_outer_distance(float4, float4, smallint), -STORAGE float4; - -/*--------------------float8-----------------------*/ - -CREATE FUNCTION rum_float8_extract_value(float8, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float8_compare_prefix(float8, float8, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float8_extract_query(float8, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_float8_distance(float8, float8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_float8_distance, - LEFTARG = float8, - RIGHTARG = float8, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_float8_left_distance(float8, float8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_float8_left_distance, - LEFTARG = float8, - RIGHTARG = float8, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_float8_right_distance(float8, float8) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_float8_right_distance, - LEFTARG = float8, - RIGHTARG = float8, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_float8_outer_distance(float8, float8, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_float8_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_float8_ops -DEFAULT FOR TYPE float8 USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (float8,float8) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (float8,float8) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btfloat8cmp(float8,float8), - FUNCTION 2 rum_float8_extract_value(float8, internal), - FUNCTION 3 rum_float8_extract_query(float8, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_float8_compare_prefix(float8,float8,int2, internal), - -- support to float8 distance in rum_tsvector_addon_ops - FUNCTION 6 rum_float8_config(internal), - FUNCTION 9 rum_float8_outer_distance(float8, float8, smallint), -STORAGE float8; - -/*--------------------money-----------------------*/ - -CREATE FUNCTION rum_money_extract_value(money, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_money_compare_prefix(money, money, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_money_extract_query(money, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_money_distance(money, money) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_money_distance, - LEFTARG = money, - RIGHTARG = money, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_money_left_distance(money, money) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_money_left_distance, - LEFTARG = money, - RIGHTARG = money, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_money_right_distance(money, money) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_money_right_distance, - LEFTARG = money, - RIGHTARG = money, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_money_outer_distance(money, money, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_money_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_money_ops -DEFAULT FOR TYPE money USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (money,money) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (money,money) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (money,money) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 cash_cmp(money,money), - FUNCTION 2 rum_money_extract_value(money, internal), - FUNCTION 3 rum_money_extract_query(money, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_money_compare_prefix(money,money,int2, internal), - -- support to money distance in rum_tsvector_addon_ops - FUNCTION 6 rum_money_config(internal), - FUNCTION 9 rum_money_outer_distance(money, money, smallint), -STORAGE money; - -/*--------------------oid-----------------------*/ - -CREATE FUNCTION rum_oid_extract_value(oid, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_oid_compare_prefix(oid, oid, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_oid_extract_query(oid, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - - -CREATE FUNCTION rum_oid_distance(oid, oid) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_oid_distance, - LEFTARG = oid, - RIGHTARG = oid, - COMMUTATOR = <=> -); - -CREATE FUNCTION rum_oid_left_distance(oid, oid) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR <=| ( - PROCEDURE = rum_oid_left_distance, - LEFTARG = oid, - RIGHTARG = oid, - COMMUTATOR = |=> -); - -CREATE FUNCTION rum_oid_right_distance(oid, oid) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE OPERATOR |=> ( - PROCEDURE = rum_oid_right_distance, - LEFTARG = oid, - RIGHTARG = oid, - COMMUTATOR = <=| -); - -CREATE FUNCTION rum_oid_outer_distance(oid, oid, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_oid_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - - -CREATE OPERATOR CLASS rum_oid_ops -DEFAULT FOR TYPE oid USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - OPERATOR 20 <=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 21 <=| (oid,oid) FOR ORDER BY pg_catalog.float_ops, - OPERATOR 22 |=> (oid,oid) FOR ORDER BY pg_catalog.float_ops, - FUNCTION 1 btoidcmp(oid,oid), - FUNCTION 2 rum_oid_extract_value(oid, internal), - FUNCTION 3 rum_oid_extract_query(oid, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_oid_compare_prefix(oid,oid,int2, internal), - -- support to oid distance in rum_tsvector_addon_ops - FUNCTION 6 rum_oid_config(internal), - FUNCTION 9 rum_oid_outer_distance(oid, oid, smallint), -STORAGE oid; - -/*--------------------time-----------------------*/ - -CREATE FUNCTION rum_time_extract_value(time, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_time_compare_prefix(time, time, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_time_extract_query(time, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_time_ops -DEFAULT FOR TYPE time USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 time_cmp(time,time), - FUNCTION 2 rum_time_extract_value(time, internal), - FUNCTION 3 rum_time_extract_query(time, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_time_compare_prefix(time,time,int2, internal), -STORAGE time; - -/*--------------------timetz-----------------------*/ - -CREATE FUNCTION rum_timetz_extract_value(timetz, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timetz_compare_prefix(timetz, timetz, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_timetz_extract_query(timetz, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_timetz_ops -DEFAULT FOR TYPE timetz USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 timetz_cmp(timetz,timetz), - FUNCTION 2 rum_timetz_extract_value(timetz, internal), - FUNCTION 3 rum_timetz_extract_query(timetz, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_timetz_compare_prefix(timetz,timetz,int2, internal), -STORAGE timetz; - -/*--------------------date-----------------------*/ - -CREATE FUNCTION rum_date_extract_value(date, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_date_compare_prefix(date, date, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_date_extract_query(date, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_date_ops -DEFAULT FOR TYPE date USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 date_cmp(date,date), - FUNCTION 2 rum_date_extract_value(date, internal), - FUNCTION 3 rum_date_extract_query(date, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_date_compare_prefix(date,date,int2, internal), -STORAGE date; - -/*--------------------interval-----------------------*/ - -CREATE FUNCTION rum_interval_extract_value(interval, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_interval_compare_prefix(interval, interval, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_interval_extract_query(interval, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_interval_ops -DEFAULT FOR TYPE interval USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 interval_cmp(interval,interval), - FUNCTION 2 rum_interval_extract_value(interval, internal), - FUNCTION 3 rum_interval_extract_query(interval, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_interval_compare_prefix(interval,interval,int2, internal), -STORAGE interval; - -/*--------------------macaddr-----------------------*/ - -CREATE FUNCTION rum_macaddr_extract_value(macaddr, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_macaddr_compare_prefix(macaddr, macaddr, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_macaddr_extract_query(macaddr, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_macaddr_ops -DEFAULT FOR TYPE macaddr USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 macaddr_cmp(macaddr,macaddr), - FUNCTION 2 rum_macaddr_extract_value(macaddr, internal), - FUNCTION 3 rum_macaddr_extract_query(macaddr, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_macaddr_compare_prefix(macaddr,macaddr,int2, internal), -STORAGE macaddr; - -/*--------------------inet-----------------------*/ - -CREATE FUNCTION rum_inet_extract_value(inet, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_inet_compare_prefix(inet, inet, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_inet_extract_query(inet, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_inet_ops -DEFAULT FOR TYPE inet USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 network_cmp(inet,inet), - FUNCTION 2 rum_inet_extract_value(inet, internal), - FUNCTION 3 rum_inet_extract_query(inet, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_inet_compare_prefix(inet,inet,int2, internal), -STORAGE inet; - -/*--------------------cidr-----------------------*/ - -CREATE FUNCTION rum_cidr_extract_value(cidr, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_cidr_compare_prefix(cidr, cidr, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_cidr_extract_query(cidr, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_cidr_ops -DEFAULT FOR TYPE cidr USING rum -AS - OPERATOR 1 < (inet, inet), - OPERATOR 2 <= (inet, inet), - OPERATOR 3 = (inet, inet), - OPERATOR 4 >= (inet, inet), - OPERATOR 5 > (inet, inet), - FUNCTION 1 network_cmp(inet,inet), - FUNCTION 2 rum_cidr_extract_value(cidr, internal), - FUNCTION 3 rum_cidr_extract_query(cidr, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_cidr_compare_prefix(cidr,cidr,int2, internal), -STORAGE cidr; - -/*--------------------text-----------------------*/ - -CREATE FUNCTION rum_text_extract_value(text, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_text_compare_prefix(text, text, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_text_extract_query(text, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_text_ops -DEFAULT FOR TYPE text USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 bttextcmp(text,text), - FUNCTION 2 rum_text_extract_value(text, internal), - FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), -STORAGE text; - -/*--------------------varchar-----------------------*/ - - -CREATE OPERATOR CLASS rum_varchar_ops -DEFAULT FOR TYPE varchar USING rum -AS - OPERATOR 1 < (text, text), - OPERATOR 2 <= (text, text), - OPERATOR 3 = (text, text), - OPERATOR 4 >= (text, text), - OPERATOR 5 > (text, text), - FUNCTION 1 bttextcmp(text,text), - FUNCTION 2 rum_text_extract_value(text, internal), - FUNCTION 3 rum_text_extract_query(text, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_text_compare_prefix(text,text,int2, internal), -STORAGE varchar; - -/*--------------------"char"-----------------------*/ - -CREATE FUNCTION rum_char_extract_value("char", internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_char_compare_prefix("char", "char", int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_char_extract_query("char", internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_char_ops -DEFAULT FOR TYPE "char" USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 btcharcmp("char","char"), - FUNCTION 2 rum_char_extract_value("char", internal), - FUNCTION 3 rum_char_extract_query("char", internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_char_compare_prefix("char","char",int2, internal), -STORAGE "char"; - -/*--------------------bytea-----------------------*/ - -CREATE FUNCTION rum_bytea_extract_value(bytea, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bytea_compare_prefix(bytea, bytea, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bytea_extract_query(bytea, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_bytea_ops -DEFAULT FOR TYPE bytea USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 byteacmp(bytea,bytea), - FUNCTION 2 rum_bytea_extract_value(bytea, internal), - FUNCTION 3 rum_bytea_extract_query(bytea, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_bytea_compare_prefix(bytea,bytea,int2, internal), -STORAGE bytea; - -/*--------------------bit-----------------------*/ - -CREATE FUNCTION rum_bit_extract_value(bit, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bit_compare_prefix(bit, bit, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_bit_extract_query(bit, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_bit_ops -DEFAULT FOR TYPE bit USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 bitcmp(bit,bit), - FUNCTION 2 rum_bit_extract_value(bit, internal), - FUNCTION 3 rum_bit_extract_query(bit, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_bit_compare_prefix(bit,bit,int2, internal), -STORAGE bit; - -/*--------------------varbit-----------------------*/ - -CREATE FUNCTION rum_varbit_extract_value(varbit, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_varbit_compare_prefix(varbit, varbit, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_varbit_extract_query(varbit, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_varbit_ops -DEFAULT FOR TYPE varbit USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 varbitcmp(varbit,varbit), - FUNCTION 2 rum_varbit_extract_value(varbit, internal), - FUNCTION 3 rum_varbit_extract_query(varbit, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_varbit_compare_prefix(varbit,varbit,int2, internal), -STORAGE varbit; - -/*--------------------numeric-----------------------*/ - -CREATE FUNCTION rum_numeric_extract_value(numeric, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_numeric_compare_prefix(numeric, numeric, int2, internal) -RETURNS int4 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - -CREATE FUNCTION rum_numeric_extract_query(numeric, internal, int2, internal, internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT IMMUTABLE; - - -CREATE OPERATOR CLASS rum_numeric_ops -DEFAULT FOR TYPE numeric USING rum -AS - OPERATOR 1 < , - OPERATOR 2 <= , - OPERATOR 3 = , - OPERATOR 4 >= , - OPERATOR 5 > , - FUNCTION 1 rum_numeric_cmp(numeric,numeric), - FUNCTION 2 rum_numeric_extract_value(numeric, internal), - FUNCTION 3 rum_numeric_extract_query(numeric, internal, int2, internal, internal), - FUNCTION 4 rum_btree_consistent(internal,smallint,internal,int,internal,internal,internal,internal), - FUNCTION 5 rum_numeric_compare_prefix(numeric,numeric,int2, internal), -STORAGE numeric; - -/* - * RUM version 1.2 - */ - -/*--------------------anyarray-----------------------*/ - -CREATE FUNCTION rum_anyarray_config(internal) -RETURNS void -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -CREATE FUNCTION rum_anyarray_similar(anyarray,anyarray) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT STABLE; - -CREATE OPERATOR % ( - PROCEDURE = rum_anyarray_similar, - LEFTARG = anyarray, - RIGHTARG = anyarray, - COMMUTATOR = '%', - RESTRICT = contsel, - JOIN = contjoinsel -); - - -CREATE FUNCTION rum_anyarray_distance(anyarray,anyarray) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C STRICT STABLE; - -CREATE OPERATOR <=> ( - PROCEDURE = rum_anyarray_distance, - LEFTARG = anyarray, - RIGHTARG = anyarray, - COMMUTATOR = '<=>' -); - - -CREATE FUNCTION rum_extract_anyarray(anyarray,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_extract_anyarray_query(anyarray,internal,smallint,internal,internal,internal,internal) -RETURNS internal -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_anyarray_consistent(internal, smallint, anyarray, integer, internal, internal, internal, internal) -RETURNS bool -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - -CREATE FUNCTION rum_anyarray_ordering(internal,smallint,anyarray,int,internal,internal,internal,internal,internal) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -CREATE OPERATOR CLASS rum_anyarray_ops -DEFAULT FOR TYPE anyarray USING rum -AS - OPERATOR 1 && (anyarray, anyarray), - OPERATOR 2 @> (anyarray, anyarray), - OPERATOR 3 <@ (anyarray, anyarray), - OPERATOR 4 = (anyarray, anyarray), - OPERATOR 5 % (anyarray, anyarray), - OPERATOR 20 <=> (anyarray, anyarray) FOR ORDER BY pg_catalog.float_ops, - --dispatch function 1 for concrete type - FUNCTION 2 rum_extract_anyarray(anyarray,internal,internal,internal,internal), - FUNCTION 3 rum_extract_anyarray_query(anyarray,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 rum_anyarray_consistent(internal,smallint,anyarray,integer,internal,internal,internal,internal), - FUNCTION 6 rum_anyarray_config(internal), - FUNCTION 8 rum_anyarray_ordering(internal,smallint,anyarray,int,internal,internal,internal,internal,internal), - STORAGE anyelement; - -CREATE OPERATOR CLASS rum_anyarray_addon_ops -FOR TYPE anyarray USING rum -AS - OPERATOR 1 && (anyarray, anyarray), - OPERATOR 2 @> (anyarray, anyarray), - OPERATOR 3 <@ (anyarray, anyarray), - OPERATOR 4 = (anyarray, anyarray), - --dispatch function 1 for concrete type - FUNCTION 2 ginarrayextract(anyarray,internal,internal), - FUNCTION 3 ginqueryarrayextract(anyarray,internal,smallint,internal,internal,internal,internal), - FUNCTION 4 ginarrayconsistent(internal,smallint,anyarray,integer,internal,internal,internal,internal), - STORAGE anyelement; - -/*--------------------int2-----------------------*/ - -CREATE FUNCTION rum_int2_key_distance(int2, int2, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_int2_ops USING rum ADD - FUNCTION 8 (int2,int2) rum_int2_key_distance(int2, int2, smallint); - -/*--------------------int4-----------------------*/ - -CREATE FUNCTION rum_int4_key_distance(int4, int4, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_int4_ops USING rum ADD - FUNCTION 8 (int4,int4) rum_int4_key_distance(int4, int4, smallint); - -/*--------------------int8-----------------------*/ - -CREATE FUNCTION rum_int8_key_distance(int8, int8, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_int8_ops USING rum ADD - FUNCTION 8 (int8,int8) rum_int8_key_distance(int8, int8, smallint); - -/*--------------------float4-----------------------*/ - -CREATE FUNCTION rum_float4_key_distance(float4, float4, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_float4_ops USING rum ADD - FUNCTION 8 (float4,float4) rum_float4_key_distance(float4, float4, smallint); - -/*--------------------float8-----------------------*/ - -CREATE FUNCTION rum_float8_key_distance(float8, float8, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_float8_ops USING rum ADD - FUNCTION 8 (float8,float8) rum_float8_key_distance(float8, float8, smallint); - -/*--------------------money-----------------------*/ - -CREATE FUNCTION rum_money_key_distance(money, money, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_money_ops USING rum ADD - FUNCTION 8 (money,money) rum_money_key_distance(money, money, smallint); - -/*--------------------oid-----------------------*/ - -CREATE FUNCTION rum_oid_key_distance(oid, oid, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_oid_ops USING rum ADD - FUNCTION 8 (oid,oid) rum_oid_key_distance(oid, oid, smallint); - -/*--------------------timestamp-----------------------*/ - -CREATE FUNCTION rum_timestamp_key_distance(timestamp, timestamp, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_timestamp_ops USING rum ADD - FUNCTION 8 (timestamp,timestamp) rum_timestamp_key_distance(timestamp, timestamp, smallint); - -/*--------------------timestamptz-----------------------*/ - -CREATE FUNCTION rum_timestamptz_key_distance(timestamptz, timestamptz, smallint) -RETURNS float8 -AS 'MODULE_PATHNAME' -LANGUAGE C IMMUTABLE STRICT; - - -ALTER OPERATOR FAMILY rum_timestamptz_ops USING rum ADD - FUNCTION 8 (timestamptz,timestamptz) rum_timestamptz_key_distance(timestamptz, timestamptz, smallint); - diff --git a/rum--1.3.sql b/rum_init.sql similarity index 100% rename from rum--1.3.sql rename to rum_init.sql From c2f0ad54f51709b51c7d2338ab37a285eb2e6f91 Mon Sep 17 00:00:00 2001 From: Zharkov Roman Date: Tue, 21 Jan 2025 16:43:46 +0300 Subject: [PATCH 177/182] Add meson.build file to support building from the contrib source tree. --- meson.build | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 meson.build diff --git a/meson.build b/meson.build new file mode 100644 index 0000000000..b339fcce76 --- /dev/null +++ b/meson.build @@ -0,0 +1,96 @@ +# Copyright (c) 2025, Postgres Professional + +# Does not support the PGXS infrastructure at this time. Please, compile as part +# of the contrib source tree. + +rum_sources = files( + 'src/btree_rum.c', + 'src/rum_arr_utils.c', + 'src/rum_ts_utils.c', + 'src/rumbtree.c', + 'src/rumbulk.c', + 'src/rumdatapage.c', + 'src/rumentrypage.c', + 'src/rumget.c', + 'src/ruminsert.c', + 'src/rumscan.c', + 'src/rumsort.c', + 'src/rumtsquery.c', + 'src/rumutil.c', + 'src/rumvacuum.c', + 'src/rumvalidate.c', +) + +if host_system == 'windows' + rum_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'rum', + '--FILEDESC', 'rum - provides access method to work with the RUM indexes.',]) +endif + +rum = shared_module('rum', + rum_sources, + kwargs: contrib_mod_args, +) +contrib_targets += rum + +install_data( + 'rum.control', + 'rum--1.0--1.1.sql', + 'rum--1.0.sql', + 'rum--1.1--1.2.sql', + 'rum--1.1.sql', + 'rum--1.2--1.3.sql', + 'rum--1.2.sql', + 'rum--1.3.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'rum', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'regress': { + 'sql': [ + 'security', + 'rum', + 'rum_validate', + 'rum_hash', + 'ruminv', + 'timestamp', + 'orderby', + 'orderby_hash', + 'altorder', + 'altorder_hash', + 'limits', + 'int2', + 'int4', + 'int8', + 'float4', + 'float8', + 'money', + 'oid', + 'time', + 'timetz', + 'date', + 'interval', + 'macaddr', + 'inet', + 'cidr', + 'text', + 'varchar', + 'char', + 'bytea', + 'bit', + 'varbit', + 'numeric', + 'rum_weight', + 'expr', + 'array', + ], + }, + 'tap': { + 'tests': [ + 't/001_wal.pl', + ], + }, +} From 049b40c9d4e1260a15412c9842e8201fe30bda07 Mon Sep 17 00:00:00 2001 From: Zharkov Roman Date: Thu, 13 Feb 2025 13:00:49 +0700 Subject: [PATCH 178/182] Update meson.build to sync with Makefile. --- meson.build | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index b339fcce76..4caf9bb45c 100644 --- a/meson.build +++ b/meson.build @@ -3,6 +3,9 @@ # Does not support the PGXS infrastructure at this time. Please, compile as part # of the contrib source tree. +extension = 'rum' +extversion = '1.3' + rum_sources = files( 'src/btree_rum.c', 'src/rum_arr_utils.c', @@ -33,15 +36,19 @@ rum = shared_module('rum', ) contrib_targets += rum +configure_file( + input: 'rum_init.sql', + output: extension + '--' + extversion + '.sql', + copy: true, + install: true, + install_dir: contrib_data_args['install_dir'], +) + install_data( 'rum.control', 'rum--1.0--1.1.sql', - 'rum--1.0.sql', 'rum--1.1--1.2.sql', - 'rum--1.1.sql', 'rum--1.2--1.3.sql', - 'rum--1.2.sql', - 'rum--1.3.sql', kwargs: contrib_data_args, ) @@ -91,6 +98,8 @@ tests += { 'tap': { 'tests': [ 't/001_wal.pl', + 't/002_pglist.pl', ], + 'test_kwargs': {'timeout': 3000}, }, } From 2ad36badd1f15093635185a70e71057b8787086c Mon Sep 17 00:00:00 2001 From: Vadim Minigaliev Date: Thu, 3 Oct 2024 10:01:43 +0300 Subject: [PATCH 179/182] [PGPRO-9336] Fix of isolation tests and beautify makefile --- .travis.yml | 7 - Makefile | 17 + expected/predicate-rum-2.out | 600 +++++++++++++++++-------------- expected/predicate-rum-2_1.out | 501 -------------------------- expected/predicate-rum.out | 622 ++++++++++++++++++--------------- expected/predicate-rum_1.out | 521 --------------------------- meson.build | 16 +- specs/predicate-rum-2.spec | 24 +- specs/predicate-rum.spec | 24 +- 9 files changed, 741 insertions(+), 1591 deletions(-) delete mode 100644 expected/predicate-rum-2_1.out delete mode 100644 expected/predicate-rum_1.out diff --git a/.travis.yml b/.travis.yml index 7ee9c5d4cd..0c21a422c2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,10 +35,3 @@ env: - PG_VERSION=13 LEVEL=hardcore - PG_VERSION=12 - PG_VERSION=12 LEVEL=hardcore - - PG_VERSION=11 - - PG_VERSION=11 LEVEL=hardcore - -matrix: - allow_failures: - - env: PG_VERSION=11 - - env: PG_VERSION=11 LEVEL=hardcore \ No newline at end of file diff --git a/Makefile b/Makefile index cca576da85..a8d510019d 100644 --- a/Makefile +++ b/Makefile @@ -30,9 +30,20 @@ REGRESS = security rum rum_validate rum_hash ruminv timestamp orderby orderby_ha TAP_TESTS = 1 +ISOLATION = predicate-rum predicate-rum-2 +ISOLATION_OPTS = --load-extension=rum EXTRA_CLEAN = pglist_tmp ifdef USE_PGXS + +# We cannot run isolation test for versions 12,13 in PGXS case +# because 'pg_isolation_regress' is not copied to install +# directory, see src/test/isolation/Makefile +ifeq ($(MAJORVERSION),$(filter 12% 13%,$(MAJORVERSION))) +undefine ISOLATION +undefine ISOLATION_OPTS +endif + PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) @@ -60,6 +71,11 @@ wal-check: temp-install check: wal-check endif +# +# Make conditional targets to save backward compatibility with PG11, PG10 and PG9.6. +# +ifeq ($(MAJORVERSION), $(filter 9.6% 10% 11%, $(MAJORVERSION))) + install: installincludes installincludes: @@ -83,3 +99,4 @@ isolationcheck: | submake-isolation submake-rum temp-install $(pg_isolation_regress_check) \ --temp-config $(top_srcdir)/contrib/rum/logical.conf \ $(ISOLATIONCHECKS) +endif \ No newline at end of file diff --git a/expected/predicate-rum-2.out b/expected/predicate-rum-2.out index d8a731091d..20c2d1e21d 100644 --- a/expected/predicate-rum-2.out +++ b/expected/predicate-rum-2.out @@ -2,109 +2,129 @@ Parsed test spec with 2 sessions starting permutation: rxy1 wx1 c1 rxy2 wy2 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; starting permutation: rxy1 wx1 rxy2 c1 wy2 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step c1: COMMIT; step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; starting permutation: rxy1 wx1 rxy2 wy2 c1 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c1: COMMIT; step c2: COMMIT; starting permutation: rxy1 wx1 rxy2 wy2 c2 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; step c1: COMMIT; starting permutation: rxy1 rxy2 wx1 c1 wy2 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; step wy2: INSERT INTO rum_tbl(tsv) values('xz'); @@ -112,21 +132,25 @@ step c2: COMMIT; starting permutation: rxy1 rxy2 wx1 wy2 c1 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c1: COMMIT; @@ -134,21 +158,25 @@ step c2: COMMIT; starting permutation: rxy1 rxy2 wx1 wy2 c2 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; @@ -156,21 +184,25 @@ step c1: COMMIT; starting permutation: rxy1 rxy2 wy2 wx1 c1 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; @@ -178,21 +210,25 @@ step c2: COMMIT; starting permutation: rxy1 rxy2 wy2 wx1 c2 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c2: COMMIT; @@ -200,21 +236,25 @@ step c1: COMMIT; starting permutation: rxy1 rxy2 wy2 c2 wx1 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -222,21 +262,25 @@ step c1: COMMIT; starting permutation: rxy2 rxy1 wx1 c1 wy2 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; step wy2: INSERT INTO rum_tbl(tsv) values('xz'); @@ -244,21 +288,25 @@ step c2: COMMIT; starting permutation: rxy2 rxy1 wx1 wy2 c1 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c1: COMMIT; @@ -266,21 +314,25 @@ step c2: COMMIT; starting permutation: rxy2 rxy1 wx1 wy2 c2 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; @@ -288,21 +340,25 @@ step c1: COMMIT; starting permutation: rxy2 rxy1 wy2 wx1 c1 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; @@ -310,21 +366,25 @@ step c2: COMMIT; starting permutation: rxy2 rxy1 wy2 wx1 c2 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c2: COMMIT; @@ -332,21 +392,25 @@ step c1: COMMIT; starting permutation: rxy2 rxy1 wy2 c2 wx1 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -354,88 +418,104 @@ step c1: COMMIT; starting permutation: rxy2 wy2 rxy1 wx1 c1 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; step c2: COMMIT; starting permutation: rxy2 wy2 rxy1 wx1 c2 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c2: COMMIT; step c1: COMMIT; starting permutation: rxy2 wy2 rxy1 c2 wx1 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step c2: COMMIT; step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; starting permutation: rxy2 wy2 c2 rxy1 wx1 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('xz'); step c2: COMMIT; step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('ab'); step c1: COMMIT; diff --git a/expected/predicate-rum-2_1.out b/expected/predicate-rum-2_1.out deleted file mode 100644 index 8d3e0d173c..0000000000 --- a/expected/predicate-rum-2_1.out +++ /dev/null @@ -1,501 +0,0 @@ -Parsed test spec with 2 sessions - -starting permutation: rxy1 wx1 c1 rxy2 wy2 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; - -starting permutation: rxy1 wx1 rxy2 c1 wy2 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step c1: COMMIT; -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; - -starting permutation: rxy1 wx1 rxy2 wy2 c1 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c1: COMMIT; -step c2: COMMIT; - -starting permutation: rxy1 wx1 rxy2 wy2 c2 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; -step c1: COMMIT; - -starting permutation: rxy1 rxy2 wx1 c1 wy2 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; - -starting permutation: rxy1 rxy2 wx1 wy2 c1 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c1: COMMIT; -step c2: COMMIT; - -starting permutation: rxy1 rxy2 wx1 wy2 c2 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; -step c1: COMMIT; - -starting permutation: rxy1 rxy2 wy2 wx1 c1 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; -step c2: COMMIT; - -starting permutation: rxy1 rxy2 wy2 wx1 c2 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c2: COMMIT; -step c1: COMMIT; - -starting permutation: rxy1 rxy2 wy2 c2 wx1 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; - -starting permutation: rxy2 rxy1 wx1 c1 wy2 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; - -starting permutation: rxy2 rxy1 wx1 wy2 c1 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c1: COMMIT; -step c2: COMMIT; - -starting permutation: rxy2 rxy1 wx1 wy2 c2 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; -step c1: COMMIT; - -starting permutation: rxy2 rxy1 wy2 wx1 c1 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; -step c2: COMMIT; - -starting permutation: rxy2 rxy1 wy2 wx1 c2 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c2: COMMIT; -step c1: COMMIT; - -starting permutation: rxy2 rxy1 wy2 c2 wx1 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; - -starting permutation: rxy2 wy2 rxy1 wx1 c1 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; -step c2: COMMIT; - -starting permutation: rxy2 wy2 rxy1 wx1 c2 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c2: COMMIT; -step c1: COMMIT; - -starting permutation: rxy2 wy2 rxy1 c2 wx1 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step c2: COMMIT; -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; - -starting permutation: rxy2 wy2 c2 rxy1 wx1 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('xz'); -step c2: COMMIT; -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('ab'); -step c1: COMMIT; diff --git a/expected/predicate-rum.out b/expected/predicate-rum.out index c708c1267a..f4b6ddc4bc 100644 --- a/expected/predicate-rum.out +++ b/expected/predicate-rum.out @@ -2,460 +2,522 @@ Parsed test spec with 2 sessions starting permutation: rxy1 wx1 c1 rxy2 wy2 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +339|'qh' +(6 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 -677 'qh' step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; starting permutation: rxy1 wx1 rxy2 c1 wy2 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step c1: COMMIT; step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -ERROR: could not serialize access due to read/write dependencies among transactions step c2: COMMIT; starting permutation: rxy1 wx1 rxy2 wy2 c1 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c1: COMMIT; step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy1 wx1 rxy2 wy2 c2 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy1 rxy2 wx1 c1 wy2 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -ERROR: could not serialize access due to read/write dependencies among transactions step c2: COMMIT; starting permutation: rxy1 rxy2 wx1 wy2 c1 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c1: COMMIT; step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy1 rxy2 wx1 wy2 c2 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy1 rxy2 wy2 wx1 c1 c2 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy1 rxy2 wy2 wx1 c2 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c2: COMMIT; step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy1 rxy2 wy2 c2 wx1 c1 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -ERROR: could not serialize access due to read/write dependencies among transactions step c1: COMMIT; starting permutation: rxy2 rxy1 wx1 c1 wy2 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -ERROR: could not serialize access due to read/write dependencies among transactions step c2: COMMIT; starting permutation: rxy2 rxy1 wx1 wy2 c1 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c1: COMMIT; step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy2 rxy1 wx1 wy2 c2 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy2 rxy1 wy2 wx1 c1 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy2 rxy1 wy2 wx1 c2 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c2: COMMIT; step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy2 rxy1 wy2 c2 wx1 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -ERROR: could not serialize access due to read/write dependencies among transactions step c1: COMMIT; starting permutation: rxy2 wy2 rxy1 wx1 c1 c2 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy2 wy2 rxy1 wx1 c2 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c2: COMMIT; step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions starting permutation: rxy2 wy2 rxy1 c2 wx1 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +(5 rows) + step c2: COMMIT; step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -ERROR: could not serialize access due to read/write dependencies among transactions step c1: COMMIT; starting permutation: rxy2 wy2 c2 rxy1 wx1 c1 step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv + id|tsv +---+---------------------------------------------------------------------- + 10|'af':8 'iy':3 'kg':5 'ln':10 'lq':1 'po':7 'pp':4 'qh':2 'sx':6 'yw':9 + 77|'da':3 'dr':4 'iy':9 'lq':7 'pp':10 'qh':8 'qj':2 'rs':1 'si':5 'uz':6 +145|'af':10 'iy':5 'kg':7 'lq':3 'po':9 'pp':6 'qh':4 'si':1 'sx':8 'uz':2 +212|'da':5 'dr':6 'hb':2 'kk':1 'lq':9 'qh':10 'qj':4 'rs':3 'si':7 'uz':8 +280|'da':1 'dr':2 'iy':7 'kg':9 'lq':5 'pp':8 'qh':6 'si':3 'sx':10 'uz':4 +(5 rows) -424 'qh':1 'su':2 'tu':3 'ww':4 -230 'iv':1 'lp':2 'mt':4 'qh':3 'ss':5 -248 'jn':1 'js':4 'mx':2 'ne':7 'nn':5 'nw':3 'qh':6 -50 'bx':1 'ca':5 'da':10 'dn':2 'eq':6 'fn':8 'gl':7 'hu':3 'ig':9 'mg':4 'qh':11 step wy2: INSERT INTO rum_tbl(tsv) values('hx'); step c2: COMMIT; step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -165 'gi':1 'gj':2 'gx':6 'hb':5 'hx':8 'ir':7 'sq':3 'yg':4 -74 'cv':1 'de':8 'ds':10 'eh':4 'fd':6 'gh':3 'gi':7 'hn':5 'hx':9 'lo':2 -116 'el':1 'er':9 'ez':6 'gr':3 'gt':4 'hx':7 'ie':5 'iv':2 'od':10 'zf':8 -119 'eo':1 'fc':5 'he':7 'ht':9 'hx':8 'it':2 'km':3 'so':4 'uj':6 -190 'hh':1 'hx':2 'id':5 'iv':3 'ld':7 'ob':6 'oy':4 -206 'hx':1 'it':9 'ji':10 'jl':5 'lq':3 'mh':8 'nq':6 'pc':7 'ub':4 'xi':2 -677 'hx' + id|tsv +---+---------------------------------------------------------------------- + 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 + 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 +163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 +231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +339|'hx' +(6 rows) + step wx1: INSERT INTO rum_tbl(tsv) values('qh'); step c1: COMMIT; diff --git a/expected/predicate-rum_1.out b/expected/predicate-rum_1.out deleted file mode 100644 index 7f7a0dc9a6..0000000000 --- a/expected/predicate-rum_1.out +++ /dev/null @@ -1,521 +0,0 @@ -Parsed test spec with 2 sessions - -starting permutation: rxy1 wx1 c1 rxy2 wy2 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -677 'qh' -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; - -starting permutation: rxy1 wx1 rxy2 c1 wy2 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step c1: COMMIT; -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -ERROR: could not serialize access due to read/write dependencies among transactions -step c2: COMMIT; - -starting permutation: rxy1 wx1 rxy2 wy2 c1 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c1: COMMIT; -step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy1 wx1 rxy2 wy2 c2 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; -step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy1 rxy2 wx1 c1 wy2 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -ERROR: could not serialize access due to read/write dependencies among transactions -step c2: COMMIT; - -starting permutation: rxy1 rxy2 wx1 wy2 c1 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c1: COMMIT; -step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy1 rxy2 wx1 wy2 c2 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; -step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy1 rxy2 wy2 wx1 c1 c2 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; -step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy1 rxy2 wy2 wx1 c2 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c2: COMMIT; -step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy1 rxy2 wy2 c2 wx1 c1 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -ERROR: could not serialize access due to read/write dependencies among transactions -step c1: COMMIT; - -starting permutation: rxy2 rxy1 wx1 c1 wy2 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -ERROR: could not serialize access due to read/write dependencies among transactions -step c2: COMMIT; - -starting permutation: rxy2 rxy1 wx1 wy2 c1 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c1: COMMIT; -step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy2 rxy1 wx1 wy2 c2 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; -step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy2 rxy1 wy2 wx1 c1 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; -step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy2 rxy1 wy2 wx1 c2 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c2: COMMIT; -step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy2 rxy1 wy2 c2 wx1 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -ERROR: could not serialize access due to read/write dependencies among transactions -step c1: COMMIT; - -starting permutation: rxy2 wy2 rxy1 wx1 c1 c2 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; -step c2: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy2 wy2 rxy1 wx1 c2 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c2: COMMIT; -step c1: COMMIT; -ERROR: could not serialize access due to read/write dependencies among transactions - -starting permutation: rxy2 wy2 rxy1 c2 wx1 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -step c2: COMMIT; -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -ERROR: could not serialize access due to read/write dependencies among transactions -step c1: COMMIT; - -starting permutation: rxy2 wy2 c2 rxy1 wx1 c1 -step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; -id tsv - -424 'qh':1 'su':3 'vf':2 -238 'jd':1 'kp':6 'pm':3 'py':5 'qh':4 'ye':2 -299 'lm':1 'nh':6 'nl':3 'op':5 'pl':2 'qh':8 'un':7 'yt':4 -324 'ml':1 'oc':7 'ol':8 'ou':2 'pj':5 'qh':3 'sa':9 'uq':6 'zw':4 -413 'pw':1 'qd':3 'qh':7 'qu':8 'rm':2 'rv':4 'so':5 'sv':6 'tz':9 -147 'fq':1 'ga':2 'gg':3 'iu':9 'iz':10 'kd':5 'lf':4 'mx':7 'qh':11 'tj':6 'yj':8 -step wy2: INSERT INTO rum_tbl(tsv) values('hx'); -step c2: COMMIT; -step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; -id tsv - -195 'hm':1 'hu':4 'hx':7 'ja':5 'jz':3 'pf':2 'yy':6 -131 'fa':1 'fo':8 'fs':4 'gq':5 'hx':6 'jh':7 'lw':2 'nx':3 -141 'fk':1 'fm':8 'ft':5 'hd':3 'hx':9 'je':2 'nm':4 'ph':7 're':6 -148 'fr':1 'gz':5 'hq':8 'hx':6 'ia':2 'jj':10 'jt':9 'lo':7 'no':4 'wm':3 -206 'hx':1 'iq':4 'ki':10 'kz':3 'lt':8 'ol':9 'pa':7 'tb':5 'ui':2 'xh':6 -45 'bs':1,5 'bu':8 'ce':7 'dm':6 'ea':9 'ej':10 'fd':2 'gj':11 'hx':4 'vo':3 -162 'gf':1,5 'gz':9 'hx':3 'ik':2 'je':11 'jk':10 'jy':4 'nz':6 'qz':7 'rw':8 -677 'hx' -step wx1: INSERT INTO rum_tbl(tsv) values('qh'); -step c1: COMMIT; diff --git a/meson.build b/meson.build index b339fcce76..a80260cc8c 100644 --- a/meson.build +++ b/meson.build @@ -24,7 +24,7 @@ rum_sources = files( if host_system == 'windows' rum_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ '--NAME', 'rum', - '--FILEDESC', 'rum - provides access method to work with the RUM indexes.',]) + '--FILEDESC', 'RUM index access method',]) endif rum = shared_module('rum', @@ -87,10 +87,22 @@ tests += { 'expr', 'array', ], + 'regress_args': [ + '--temp-config', files('logical.conf') + ], }, 'tap': { 'tests': [ 't/001_wal.pl', ], }, -} + 'isolation': { + 'specs': [ + 'predicate-rum', + 'predicate-rum-2', + ], + 'regress_args': [ + '--temp-config', files('logical.conf'), + ], + }, +} \ No newline at end of file diff --git a/specs/predicate-rum-2.spec b/specs/predicate-rum-2.spec index 0d0278ed77..763375fa71 100644 --- a/specs/predicate-rum-2.spec +++ b/specs/predicate-rum-2.spec @@ -6,24 +6,29 @@ setup { - CREATE EXTENSION rum; - CREATE TABLE rum_tbl (id serial, tsv tsvector); CREATE TABLE text_table (id1 serial, t text[]); - SELECT SETSEED(0.5); - INSERT INTO text_table(t) SELECT array[chr(i) || chr(j)] FROM generate_series(65,90) i, generate_series(65,90) j ; - INSERT INTO rum_tbl(tsv) SELECT to_tsvector('simple', t[1] ) FROM text_table; - + -- We need to use pseudorandom to generate values for test table + -- In this case we use linear congruential generator because random() + -- function may generate different outputs with different systems DO $$ + DECLARE + c integer := 17; + a integer := 261; + m integer := 6760; + Xi integer := 228; BEGIN - FOR j in 1..10 LOOP - UPDATE rum_tbl SET tsv = tsv || q.t1 FROM (SELECT id1,to_tsvector('simple', t[1] ) - as t1 FROM text_table) as q WHERE id = (random()*q.id1)::integer; + FOR i in 1..338 LOOP + INSERT INTO rum_tbl(tsv) VALUES (''); + FOR j in 1..10 LOOP + UPDATE rum_tbl SET tsv = tsv || (SELECT to_tsvector(t[1]) FROM text_table WHERE id1 = Xi % 676 + 1) WHERE id = i; + Xi = (a * Xi + c) % m; + END LOOP; END LOOP; END; $$; @@ -35,7 +40,6 @@ teardown { DROP TABLE text_table; DROP TABLE rum_tbl; - DROP EXTENSION rum; } session "s1" diff --git a/specs/predicate-rum.spec b/specs/predicate-rum.spec index 2d87194d40..ec12b51f89 100644 --- a/specs/predicate-rum.spec +++ b/specs/predicate-rum.spec @@ -6,24 +6,29 @@ setup { - CREATE EXTENSION rum; - CREATE TABLE rum_tbl (id serial, tsv tsvector); CREATE TABLE text_table (id1 serial, t text[]); - SELECT SETSEED(0.5); - INSERT INTO text_table(t) SELECT array[chr(i) || chr(j)] FROM generate_series(65,90) i, generate_series(65,90) j ; - INSERT INTO rum_tbl(tsv) SELECT to_tsvector('simple', t[1] ) FROM text_table; - + -- We need to use pseudorandom to generate values for test table + -- In this case we use linear congruential generator because random() + -- function may generate different outputs with different systems DO $$ + DECLARE + c integer := 17; + a integer := 261; + m integer := 6760; + Xi integer := 228; BEGIN - FOR j in 1..10 LOOP - UPDATE rum_tbl SET tsv = tsv || q.t1 FROM (SELECT id1,to_tsvector('simple', t[1] ) - as t1 FROM text_table) as q WHERE id = (random()*q.id1)::integer; + FOR i in 1..338 LOOP + INSERT INTO rum_tbl(tsv) VALUES (''); + FOR j in 1..10 LOOP + UPDATE rum_tbl SET tsv = tsv || (SELECT to_tsvector(t[1]) FROM text_table WHERE id1 = Xi % 676 + 1) WHERE id = i; + Xi = (a * Xi + c) % m; + END LOOP; END LOOP; END; $$; @@ -35,7 +40,6 @@ teardown { DROP TABLE text_table; DROP TABLE rum_tbl; - DROP EXTENSION rum; } session "s1" From 4a1c3005a443f36f4171fa7513d2fc6587fd3122 Mon Sep 17 00:00:00 2001 From: Zharkov Roman Date: Tue, 4 Mar 2025 15:47:51 +0700 Subject: [PATCH 180/182] Update meson.build to make the isolation tests work. --- meson.build | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meson.build b/meson.build index d963c020de..b4336f0668 100644 --- a/meson.build +++ b/meson.build @@ -112,6 +112,7 @@ tests += { ], 'regress_args': [ '--temp-config', files('logical.conf'), + '--load-extension=rum', ], }, -} \ No newline at end of file +} From 565a36a887ad87853fcbf139522ae7afa3c9e853 Mon Sep 17 00:00:00 2001 From: Vadim Minigaliev Date: Fri, 11 Apr 2025 11:54:35 +0300 Subject: [PATCH 181/182] [PGPRO-13081] Fixup for [PGPRO-9336] Fix of isolation tests and beautify makefile Tags: rum --- expected/predicate-rum-2.out | 120 +++++++++++++++++------------------ expected/predicate-rum.out | 120 +++++++++++++++++------------------ specs/predicate-rum-2.spec | 2 +- specs/predicate-rum.spec | 2 +- 4 files changed, 122 insertions(+), 122 deletions(-) diff --git a/expected/predicate-rum-2.out b/expected/predicate-rum-2.out index 20c2d1e21d..cc4720c052 100644 --- a/expected/predicate-rum-2.out +++ b/expected/predicate-rum-2.out @@ -5,10 +5,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -31,10 +31,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -57,10 +57,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -83,10 +83,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -109,10 +109,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -135,10 +135,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -161,10 +161,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -187,10 +187,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -213,10 +213,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -239,10 +239,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -275,10 +275,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -301,10 +301,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -327,10 +327,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -353,10 +353,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wy2: INSERT INTO rum_tbl(tsv) values('xz'); @@ -379,10 +379,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wy2: INSERT INTO rum_tbl(tsv) values('xz'); @@ -405,10 +405,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wy2: INSERT INTO rum_tbl(tsv) values('xz'); @@ -432,10 +432,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -458,10 +458,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); @@ -484,10 +484,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step c2: COMMIT; @@ -511,10 +511,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('ab'); diff --git a/expected/predicate-rum.out b/expected/predicate-rum.out index f4b6ddc4bc..86071a3c7a 100644 --- a/expected/predicate-rum.out +++ b/expected/predicate-rum.out @@ -5,10 +5,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -32,10 +32,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -58,10 +58,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -84,10 +84,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -110,10 +110,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -136,10 +136,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -162,10 +162,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -188,10 +188,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -214,10 +214,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -240,10 +240,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step rxy2: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'qh'; @@ -276,10 +276,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -302,10 +302,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -328,10 +328,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -354,10 +354,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wy2: INSERT INTO rum_tbl(tsv) values('hx'); @@ -380,10 +380,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wy2: INSERT INTO rum_tbl(tsv) values('hx'); @@ -406,10 +406,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wy2: INSERT INTO rum_tbl(tsv) values('hx'); @@ -433,10 +433,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -459,10 +459,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step wx1: INSERT INTO rum_tbl(tsv) values('qh'); @@ -485,10 +485,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 (5 rows) step c2: COMMIT; @@ -512,10 +512,10 @@ step rxy1: SELECT id, tsv FROM rum_tbl WHERE tsv @@ 'hx'; id|tsv ---+---------------------------------------------------------------------- 28|'aq':3 'eo':9 'ep':6 'fh':4 'hi':1 'hx':8 'jz':2 'pf':10 'xy':5 'zg':7 - 96|'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':8 'xy':1 'zg':3 + 96|'an':8 'be':9 'eo':5 'ep':2 'hx':4 'nw':7 'pf':6 'pv':10 'xy':1 'zg':3 163|'aq':5 'ep':8 'fh':6 'hi':3 'hx':10 'jz':4 'sa':1 'sr':2 'xy':7 'zg':9 -231|'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 -299|'eo':3 'hx':2 'jd':8 'nw':5 'pf':4 'pv':6 'sm':7 'zg':1 +231|'an':10 'aq':1 'eo':7 'ep':4 'fh':2 'hx':6 'nw':9 'pf':8 'xy':3 'zg':5 +299|'an':6 'be':7 'eo':3 'hx':2 'jd':10 'nw':5 'pf':4 'pv':8 'sm':9 'zg':1 339|'hx' (6 rows) diff --git a/specs/predicate-rum-2.spec b/specs/predicate-rum-2.spec index 763375fa71..c88383caee 100644 --- a/specs/predicate-rum-2.spec +++ b/specs/predicate-rum-2.spec @@ -26,7 +26,7 @@ setup FOR i in 1..338 LOOP INSERT INTO rum_tbl(tsv) VALUES (''); FOR j in 1..10 LOOP - UPDATE rum_tbl SET tsv = tsv || (SELECT to_tsvector(t[1]) FROM text_table WHERE id1 = Xi % 676 + 1) WHERE id = i; + UPDATE rum_tbl SET tsv = tsv || (SELECT to_tsvector('simple', t[1]) FROM text_table WHERE id1 = Xi % 676 + 1) WHERE id = i; Xi = (a * Xi + c) % m; END LOOP; END LOOP; diff --git a/specs/predicate-rum.spec b/specs/predicate-rum.spec index ec12b51f89..4d324b9ef2 100644 --- a/specs/predicate-rum.spec +++ b/specs/predicate-rum.spec @@ -26,7 +26,7 @@ setup FOR i in 1..338 LOOP INSERT INTO rum_tbl(tsv) VALUES (''); FOR j in 1..10 LOOP - UPDATE rum_tbl SET tsv = tsv || (SELECT to_tsvector(t[1]) FROM text_table WHERE id1 = Xi % 676 + 1) WHERE id = i; + UPDATE rum_tbl SET tsv = tsv || (SELECT to_tsvector('simple', t[1]) FROM text_table WHERE id1 = Xi % 676 + 1) WHERE id = i; Xi = (a * Xi + c) % m; END LOOP; END LOOP; From cbf80ab640fe9147cb228699ee963fdb4fef17ba Mon Sep 17 00:00:00 2001 From: Ekaterina Sokolova Date: Fri, 18 Apr 2025 17:58:47 +0300 Subject: [PATCH 182/182] Add generated files to gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index dfc31f487a..a64cea1abf 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ results __pycache__ *.pyc +rum--*.sql tmp_install log