diff --git a/engine.c b/engine.c index afaae1f..4f5031a 100644 --- a/engine.c +++ b/engine.c @@ -258,7 +258,7 @@ ptrackMapReadFromFile(const char *ptrack_path) * postmaster is the only user right now. */ elog(DEBUG1, "ptrack read map: crc %u, file_crc %u, init_lsn %X/%X", - crc, *file_crc, (uint32) (ptrack_map->init_lsn.value >> 32), (uint32) ptrack_map->init_lsn.value); + crc, *file_crc, (uint16) (ptrack_map->init_lsn.value >> 16), (uint16) ptrack_map->init_lsn.value); if (!EQ_CRC32C(*file_crc, crc)) { @@ -330,7 +330,7 @@ ptrackMapInit(void) * Fill entries with InvalidXLogRecPtr * (InvalidXLogRecPtr is actually 0) */ - memset(ptrack_map->entries, 0, PtrackContentNblocks * sizeof(pg_atomic_uint64)); + memset(ptrack_map->entries, 0, PtrackContentNblocks * sizeof(pg_atomic_uint32)); /* * Last part of memory representation of ptrack_map (crc) is actually unused * so leave it as it is @@ -348,11 +348,15 @@ ptrackCheckpoint(void) pg_crc32c crc; char ptrack_path[MAXPGPATH]; char ptrack_path_tmp[MAXPGPATH]; - XLogRecPtr init_lsn; - pg_atomic_uint64 buf[PTRACK_BUF_SIZE]; + uint32 init_lsn; + pg_atomic_uint32 buf[PTRACK_BUF_SIZE]; struct stat stat_buf; uint64 i = 0; uint64 j = 0; + XLogRecPtr new_init_lsn; + uint32 new_init_lsn32; + uint32 latest_lsn; + bool lsn_was_advanced = false; elog(DEBUG1, "ptrack checkpoint"); @@ -408,20 +412,27 @@ ptrackCheckpoint(void) ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) ptrack_map, offsetof(PtrackMapHdr, init_lsn)); - init_lsn = pg_atomic_read_u64(&ptrack_map->init_lsn); + latest_lsn = pg_atomic_read_u32(&ptrack_map->latest_lsn); + init_lsn = pg_atomic_read_u32(&ptrack_map->init_lsn); /* Set init_lsn during checkpoint if it is not set yet */ if (init_lsn == InvalidXLogRecPtr) { - XLogRecPtr new_init_lsn; - if (RecoveryInProgress()) new_init_lsn = GetXLogReplayRecPtr(NULL); else new_init_lsn = GetXLogInsertRecPtr(); - pg_atomic_write_u64(&ptrack_map->init_lsn, new_init_lsn); - init_lsn = new_init_lsn; + new_init_lsn32 = (uint32)(new_init_lsn >> 16); + pg_atomic_write_u32(&ptrack_map->init_lsn, new_init_lsn32); + init_lsn = new_init_lsn32; + } + else if (lsn_diff(lsn_advance(init_lsn, PtrackLSNGap), latest_lsn) < 0) + { + new_init_lsn32 = lsn_advance(init_lsn, PtrackLSNGap); + lsn_was_advanced = true; + pg_atomic_write_u32(&ptrack_map->init_lsn, new_init_lsn32); + init_lsn = new_init_lsn32; } /* Put init_lsn in the same buffer */ @@ -435,7 +446,7 @@ ptrackCheckpoint(void) */ while (i < PtrackContentNblocks) { - XLogRecPtr lsn; + uint32 lsn; /* * We store LSN values as pg_atomic_uint64 in the ptrack map, but @@ -445,8 +456,12 @@ ptrackCheckpoint(void) * * TODO: is it safe and can we do any better? */ - lsn = pg_atomic_read_u64(&ptrack_map->entries[i]); - buf[j].value = lsn; + lsn = pg_atomic_read_u32(&ptrack_map->entries[i]); + + if (lsn_was_advanced && lsn_diff(lsn, init_lsn) < 0) + buf[j].value = InvalidXLogRecPtr; + else + buf[j].value = lsn; i++; j++; @@ -464,7 +479,6 @@ ptrackCheckpoint(void) ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); elog(DEBUG5, "ptrack checkpoint: i " UINT64_FORMAT ", j " UINT64_FORMAT ", writesz %zu PtrackContentNblocks " UINT64_FORMAT, i, j, writesz, (uint64) PtrackContentNblocks); - j = 0; } } @@ -472,7 +486,7 @@ ptrackCheckpoint(void) /* Write if anything left */ if ((i + 1) % PTRACK_BUF_SIZE != 0) { - size_t writesz = sizeof(pg_atomic_uint64) * j; + size_t writesz = sizeof(pg_atomic_uint32) * j; ptrack_write_chunk(ptrack_tmp_fd, &crc, (char *) buf, writesz); elog(DEBUG5, "ptrack checkpoint: final i " UINT64_FORMAT ", j " UINT64_FORMAT ", writesz %zu PtrackContentNblocks " UINT64_FORMAT, @@ -684,12 +698,13 @@ ptrack_mark_block(RelFileNodeBackend smgr_rnode, size_t slot1; size_t slot2; XLogRecPtr new_lsn; + uint32 new_lsn32; /* * We use pg_atomic_uint64 here only for alignment purposes, because * pg_atomic_uint64 is forcedly aligned on 8 bytes during the MSVC build. */ - pg_atomic_uint64 old_lsn; - pg_atomic_uint64 old_init_lsn; + pg_atomic_uint32 old_lsn; + pg_atomic_uint32 old_init_lsn; if (ptrack_map_size == 0 || ptrack_map == NULL @@ -710,25 +725,32 @@ ptrack_mark_block(RelFileNodeBackend smgr_rnode, else new_lsn = GetXLogInsertRecPtr(); + new_lsn32 = (uint32)(new_lsn >> 16); + /* Atomically assign new init LSN value */ - old_init_lsn.value = pg_atomic_read_u64(&ptrack_map->init_lsn); + old_init_lsn.value = pg_atomic_read_u32(&ptrack_map->init_lsn); if (old_init_lsn.value == InvalidXLogRecPtr) { - elog(DEBUG1, "ptrack_mark_block: init_lsn " UINT64_FORMAT " <- " UINT64_FORMAT, old_init_lsn.value, new_lsn); + elog(DEBUG1, "ptrack_mark_block: init_lsn %u <- %u", old_init_lsn.value, new_lsn32); - while (old_init_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->init_lsn, (uint64 *) &old_init_lsn.value, new_lsn)); + while (old_init_lsn.value < new_lsn32 && + !pg_atomic_compare_exchange_u32(&ptrack_map->init_lsn, (uint32 *) &old_init_lsn.value, new_lsn32)); } - /* Atomically assign new LSN value to the first slot */ - old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot1]); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot1, old_lsn.value, new_lsn); - while (old_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot1], (uint64 *) &old_lsn.value, new_lsn)); + /* Assign latest_lsn first */ + old_lsn.value = pg_atomic_read_u32(&ptrack_map->latest_lsn); + while (old_lsn.value < new_lsn32 && + !pg_atomic_compare_exchange_u32(&ptrack_map->latest_lsn, (uint32 *) &old_lsn.value, new_lsn32)); + + /* Then, atomically assign new LSN value to the first slot */ + old_lsn.value = pg_atomic_read_u32(&ptrack_map->entries[slot1]); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=%u <- %u", slot1, old_lsn.value, new_lsn32); + while (old_lsn.value < new_lsn32 && + !pg_atomic_compare_exchange_u32(&ptrack_map->entries[slot1], (uint32 *) &old_lsn.value, new_lsn32)); /* And to the second */ - old_lsn.value = pg_atomic_read_u64(&ptrack_map->entries[slot2]); - elog(DEBUG3, "ptrack_mark_block: map[%zu]=" UINT64_FORMAT " <- " UINT64_FORMAT, slot2, old_lsn.value, new_lsn); - while (old_lsn.value < new_lsn && - !pg_atomic_compare_exchange_u64(&ptrack_map->entries[slot2], (uint64 *) &old_lsn.value, new_lsn)); + old_lsn.value = pg_atomic_read_u32(&ptrack_map->entries[slot2]); + elog(DEBUG3, "ptrack_mark_block: map[%zu]=%u <- %u", slot2, old_lsn.value, new_lsn32); + while (old_lsn.value < new_lsn32 && + !pg_atomic_compare_exchange_u32(&ptrack_map->entries[slot2], (uint32 *) &old_lsn.value, new_lsn32)); } diff --git a/engine.h b/engine.h index 56777fc..eb37e39 100644 --- a/engine.h +++ b/engine.h @@ -65,11 +65,13 @@ typedef struct PtrackMapHdr */ uint32 version_num; + /* LSN of current writing position */ + pg_atomic_uint32 latest_lsn; /* LSN of the moment, when map was last enabled. */ - pg_atomic_uint64 init_lsn; + pg_atomic_uint32 init_lsn; /* Followed by the actual map of LSNs */ - pg_atomic_uint64 entries[FLEXIBLE_ARRAY_MEMBER]; + pg_atomic_uint32 entries[FLEXIBLE_ARRAY_MEMBER]; /* * At the end of the map CRC of type pg_crc32c is stored. @@ -80,11 +82,11 @@ typedef PtrackMapHdr * PtrackMap; /* Number of elements in ptrack map (LSN array) */ #define PtrackContentNblocks \ - ((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint64)) + ((ptrack_map_size - offsetof(PtrackMapHdr, entries) - sizeof(pg_crc32c)) / sizeof(pg_atomic_uint32)) /* Actual size of the ptrack map, that we are able to fit into ptrack_map_size */ #define PtrackActualSize \ - (offsetof(PtrackMapHdr, entries) + PtrackContentNblocks * sizeof(pg_atomic_uint64) + sizeof(pg_crc32c)) + (offsetof(PtrackMapHdr, entries) + PtrackContentNblocks * sizeof(pg_atomic_uint32) + sizeof(pg_crc32c)) /* CRC32 value offset in order to directly access it in the shared memory chunk */ #define PtrackCrcOffset (PtrackActualSize - sizeof(pg_crc32c)) @@ -94,6 +96,7 @@ typedef PtrackMapHdr * PtrackMap; #define BID_HASH_FUNC(bid) \ (DatumGetUInt64(hash_any_extended((unsigned char *)&bid, sizeof(bid), 0))) +#define PtrackLSNGap 10e8 /* * Per process pointer to shared ptrack_map */ diff --git a/ptrack.c b/ptrack.c index 22a2acf..36fe7c0 100644 --- a/ptrack.c +++ b/ptrack.c @@ -508,7 +508,7 @@ ptrack_init_lsn(PG_FUNCTION_ARGS) { if (ptrack_map != NULL) { - XLogRecPtr init_lsn = pg_atomic_read_u64(&ptrack_map->init_lsn); + XLogRecPtr init_lsn = (XLogRecPtr) (pg_atomic_read_u32(&ptrack_map->init_lsn) << 16); PG_RETURN_LSN(init_lsn); } @@ -533,6 +533,8 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) datapagemap_t pagemap; int64 pagecount = 0; char gather_path[MAXPGPATH]; + uint32 init_lsn = InvalidXLogRecPtr; + bool within_ptrack_map = true; /* Exit immediately if there is no map */ if (ptrack_map == NULL) @@ -541,13 +543,14 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; + XLogRecPtr lsn = PG_GETARG_LSN(0); funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); ctx = (PtScanCtx *) palloc0(sizeof(PtScanCtx)); - ctx->lsn = PG_GETARG_LSN(0); + ctx->lsn = (uint32)(lsn >> 16); ctx->filelist = NIL; /* Make tuple descriptor */ @@ -597,8 +600,8 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) uint64 hash; size_t slot1; size_t slot2; - XLogRecPtr update_lsn1; - XLogRecPtr update_lsn2; + uint32 update_lsn1; + uint32 update_lsn2; /* Stop traversal if there are no more segments */ if (ctx->bid.blocknum >= ctx->relsize) @@ -638,29 +641,36 @@ ptrack_get_pagemapset(PG_FUNCTION_ARGS) SRF_RETURN_DONE(funcctx); } + init_lsn = pg_atomic_read_u32(&ptrack_map->init_lsn); hash = BID_HASH_FUNC(ctx->bid); slot1 = (size_t)(hash % PtrackContentNblocks); - update_lsn1 = pg_atomic_read_u64(&ptrack_map->entries[slot1]); + update_lsn1 = pg_atomic_read_u32(&ptrack_map->entries[slot1]); if (update_lsn1 != InvalidXLogRecPtr) elog(DEBUG3, "ptrack: update_lsn1 %X/%X of blckno %u of file %s", - (uint32) (update_lsn1 >> 32), (uint32) update_lsn1, + (uint16) (update_lsn1 >> 16), (uint16) update_lsn1, ctx->bid.blocknum, ctx->relpath); + if (init_lsn != InvalidXLogRecPtr) + within_ptrack_map = lsn_diff(init_lsn, update_lsn1) <= 0; + /* Only probe the second slot if the first one is marked */ - if (update_lsn1 >= ctx->lsn) + if (within_ptrack_map && lsn_diff(ctx->lsn, update_lsn1) <= 0) { slot2 = (size_t)(((hash << 32) | (hash >> 32)) % PtrackContentNblocks); - update_lsn2 = pg_atomic_read_u64(&ptrack_map->entries[slot2]); + update_lsn2 = pg_atomic_read_u32(&ptrack_map->entries[slot2]); if (update_lsn2 != InvalidXLogRecPtr) elog(DEBUG3, "ptrack: update_lsn2 %X/%X of blckno %u of file %s", - (uint32) (update_lsn1 >> 32), (uint32) update_lsn2, + (uint16) (update_lsn1 >> 16), (uint16) update_lsn2, ctx->bid.blocknum, ctx->relpath); + if (init_lsn != InvalidXLogRecPtr) + within_ptrack_map = lsn_diff(init_lsn, update_lsn2) <= 0; + /* Block has been changed since specified LSN. Mark it in the bitmap */ - if (update_lsn2 >= ctx->lsn) + if (within_ptrack_map && lsn_diff(ctx->lsn, update_lsn2) <= 0) { pagecount += 1; datapagemap_add(&pagemap, ctx->bid.blocknum % ((BlockNumber) RELSEG_SIZE)); diff --git a/ptrack.h b/ptrack.h index e56f60b..45a8c39 100644 --- a/ptrack.h +++ b/ptrack.h @@ -47,6 +47,8 @@ #define nodeOf(ndbck) (ndbck).node #endif +#define lsn_diff(lsn1, lsn2) ((int32)(lsn1-lsn2)) +#define lsn_advance(lsn, incr) ((uint32)(lsn+incr)) /* * Structure identifying block on the disk. */ @@ -62,7 +64,7 @@ typedef struct PtBlockId */ typedef struct PtScanCtx { - XLogRecPtr lsn; + uint32 lsn; PtBlockId bid; uint32 relsize; char *relpath;