From f67cb1340dc5ab34810c66e1eb416087011c8bd3 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Tue, 27 Jun 2023 15:59:23 +0200 Subject: [PATCH 1/6] Expose f_smgr to extensions for manual implementation There are various reasons why one would want to create their own implementation of a storage manager, among which are block-level compression, encryption and offloading to cold storage. This patch is a first patch that allows extensions to register their own SMgr. Note, however, that this SMgr is not yet used - only the first SMgr to register is used, and this is currently the md.c smgr. Future commits will include facilities to select an SMgr for each tablespace. --- src/backend/postmaster/postmaster.c | 5 + src/backend/storage/smgr/md.c | 187 +++++++++++++++++++--------- src/backend/storage/smgr/smgr.c | 137 ++++++++++---------- src/backend/utils/init/miscinit.c | 13 ++ src/include/miscadmin.h | 1 + src/include/storage/md.h | 4 + src/include/storage/smgr.h | 59 +++++++-- src/tools/pgindent/typedefs.list | 1 + 8 files changed, 266 insertions(+), 141 deletions(-) diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index d13846298bd5..61cbc21bf011 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -915,6 +915,11 @@ PostmasterMain(int argc, char *argv[]) */ ApplyLauncherRegister(); + /* + * Register built-in managers that are not part of static arrays + */ + register_builtin_dynamic_managers(); + /* * process any libraries that should be preloaded at postmaster start */ diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index f3220f98dc42..5a2072e0816f 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -84,6 +84,21 @@ typedef struct _MdfdVec } MdfdVec; static MemoryContext MdCxt; /* context for all MdfdVec objects */ +SMgrId MdSMgrId; + +typedef struct +{ + SMgrRelationData reln; /* parent data */ + + /* + * for md.c; per-fork arrays of the number of open segments + * (md_num_open_segs) and the segments themselves (md_seg_fds). + */ + int md_num_open_segs[MAX_FORKNUM + 1]; + MdfdVec *md_seg_fds[MAX_FORKNUM + 1]; +} MdSMgrRelationData; + +typedef MdSMgrRelationData *MdSMgrRelation; /* Populate a file tag describing an md.c segment file. */ @@ -130,26 +145,55 @@ typedef struct MdPathStr } MdPathStr; +void +mdsmgr_register(void) +{ + /* magnetic disk */ + f_smgr md_smgr = (f_smgr) { + .name = "md", + .smgr_init = mdinit, + .smgr_shutdown = NULL, + .smgr_open = mdopen, + .smgr_close = mdclose, + .smgr_create = mdcreate, + .smgr_exists = mdexists, + .smgr_unlink = mdunlink, + .smgr_extend = mdextend, + .smgr_zeroextend = mdzeroextend, + .smgr_prefetch = mdprefetch, + .smgr_maxcombine = mdmaxcombine, + .smgr_readv = mdreadv, + .smgr_writev = mdwritev, + .smgr_writeback = mdwriteback, + .smgr_nblocks = mdnblocks, + .smgr_truncate = mdtruncate, + .smgr_immedsync = mdimmedsync, + .smgr_registersync = mdregistersync, + }; + + MdSMgrId = smgr_register(&md_smgr, sizeof(MdSMgrRelationData)); +} + /* local routines */ static void mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo); -static MdfdVec *mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior); -static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior); +static void register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg); static void register_unlink_segment(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno); static void register_forget_request(RelFileLocatorBackend rlocator, ForkNumber forknum, BlockNumber segno); -static void _fdvec_resize(SMgrRelation reln, +static void _fdvec_resize(MdSMgrRelation reln, ForkNumber forknum, int nseg); -static MdPathStr _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, +static MdPathStr _mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno); -static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags); -static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, +static MdfdVec *_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior); -static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum, +static BlockNumber _mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg); static inline int @@ -182,6 +226,8 @@ mdinit(void) bool mdexists(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* * Close it first, to ensure that we notice if the fork has been unlinked * since we opened it. As an optimization, we can skip that in recovery, @@ -190,7 +236,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum) if (!InRecovery) mdclose(reln, forknum); - return (mdopenfork(reln, forknum, EXTENSION_RETURN_NULL) != NULL); + return (mdopenfork(mdreln, forknum, EXTENSION_RETURN_NULL) != NULL); } /* @@ -201,14 +247,15 @@ mdexists(SMgrRelation reln, ForkNumber forknum) void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *mdfd; RelPathStr path; File fd; - if (isRedo && reln->md_num_open_segs[forknum] > 0) + if (isRedo && mdreln->md_num_open_segs[forknum] > 0) return; /* created and opened already... */ - Assert(reln->md_num_open_segs[forknum] == 0); + Assert(mdreln->md_num_open_segs[forknum] == 0); /* * We may be using the target table space for the first time in this @@ -243,13 +290,13 @@ mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) } } - _fdvec_resize(reln, forknum, 1); - mdfd = &reln->md_seg_fds[forknum][0]; + _fdvec_resize(mdreln, forknum, 1); + mdfd = &mdreln->md_seg_fds[forknum][0]; mdfd->mdfd_vfd = fd; mdfd->mdfd_segno = 0; if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, mdfd); + register_dirty_segment(mdreln, forknum, mdfd); } /* @@ -467,6 +514,7 @@ void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; off_t seekpos; int nbytes; MdfdVec *v; @@ -493,7 +541,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, relpath(reln->smgr_rlocator, forknum).str, InvalidBlockNumber))); - v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE); + v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_CREATE); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -517,9 +565,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); } /* @@ -532,6 +580,7 @@ void mdzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; BlockNumber curblocknum = blocknum; int remblocks = nblocks; @@ -566,7 +615,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, else numblocks = remblocks; - v = _mdfd_getseg(reln, forknum, curblocknum, skipFsync, EXTENSION_CREATE); + v = _mdfd_getseg(mdreln, forknum, curblocknum, skipFsync, EXTENSION_CREATE); Assert(segstartblock < RELSEG_SIZE); Assert(segstartblock + numblocks <= RELSEG_SIZE); @@ -621,9 +670,9 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); - Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); + Assert(_mdnblocks(mdreln, forknum, v) <= ((BlockNumber) RELSEG_SIZE)); remblocks -= numblocks; curblocknum += numblocks; @@ -641,7 +690,7 @@ mdzeroextend(SMgrRelation reln, ForkNumber forknum, * invent one out of whole cloth. */ static MdfdVec * -mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) +mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior) { MdfdVec *mdfd; RelPathStr path; @@ -651,7 +700,7 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) if (reln->md_num_open_segs[forknum] > 0) return &reln->md_seg_fds[forknum][0]; - path = relpath(reln->smgr_rlocator, forknum); + path = relpath(reln->reln.smgr_rlocator, forknum); fd = PathNameOpenFile(path.str, _mdfd_open_flags()); @@ -681,9 +730,11 @@ mdopenfork(SMgrRelation reln, ForkNumber forknum, int behavior) void mdopen(SMgrRelation reln) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* mark it not open */ for (int forknum = 0; forknum <= MAX_FORKNUM; forknum++) - reln->md_num_open_segs[forknum] = 0; + mdreln->md_num_open_segs[forknum] = 0; } /* @@ -692,7 +743,8 @@ mdopen(SMgrRelation reln) void mdclose(SMgrRelation reln, ForkNumber forknum) { - int nopensegs = reln->md_num_open_segs[forknum]; + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + int nopensegs = mdreln->md_num_open_segs[forknum]; /* No work if already closed */ if (nopensegs == 0) @@ -701,10 +753,10 @@ mdclose(SMgrRelation reln, ForkNumber forknum) /* close segments starting from the end */ while (nopensegs > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][nopensegs - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][nopensegs - 1]; FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, nopensegs - 1); + _fdvec_resize(mdreln, forknum, nopensegs - 1); nopensegs--; } } @@ -717,6 +769,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks) { #ifdef USE_PREFETCH + MdSMgrRelation mdreln = (MdSMgrRelation) reln; Assert((io_direct_flags & IO_DIRECT_DATA) == 0); @@ -729,7 +782,7 @@ mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MdfdVec *v; int nblocks_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, InRecovery ? EXTENSION_RETURN_NULL : EXTENSION_FAIL); if (v == NULL) return false; @@ -827,6 +880,8 @@ void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + while (nblocks > 0) { struct iovec iov[PG_IOV_MAX]; @@ -838,7 +893,7 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, size_t transferred_this_segment; size_t size_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, false, + v = _mdfd_getseg(mdreln, forknum, blocknum, false, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -948,6 +1003,8 @@ void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + /* This assert is too expensive to have on normally ... */ #ifdef CHECK_WRITE_VS_EXTEND Assert((uint64) blocknum + (uint64) nblocks <= (uint64) mdnblocks(reln, forknum)); @@ -964,7 +1021,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, size_t transferred_this_segment; size_t size_this_segment; - v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, + v = _mdfd_getseg(mdreln, forknum, blocknum, skipFsync, EXTENSION_FAIL | EXTENSION_CREATE_RECOVERY); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); @@ -1034,7 +1091,7 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } if (!skipFsync && !SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); nblocks -= nblocks_this_segment; buffers += nblocks_this_segment; @@ -1053,6 +1110,8 @@ void mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; + Assert((io_direct_flags & IO_DIRECT_DATA) == 0); /* @@ -1067,7 +1126,7 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, int segnum_start, segnum_end; - v = _mdfd_getseg(reln, forknum, blocknum, true /* not used */ , + v = _mdfd_getseg(mdreln, forknum, blocknum, true /* not used */ , EXTENSION_DONT_OPEN); /* @@ -1111,14 +1170,15 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; BlockNumber nblocks; BlockNumber segno; - mdopenfork(reln, forknum, EXTENSION_FAIL); + mdopenfork(mdreln, forknum, EXTENSION_FAIL); /* mdopen has opened the first segment */ - Assert(reln->md_num_open_segs[forknum] > 0); + Assert(mdreln->md_num_open_segs[forknum] > 0); /* * Start from the last open segments, to avoid redundant seeks. We have @@ -1133,12 +1193,12 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * that's OK because the checkpointer never needs to compute relation * size.) */ - segno = reln->md_num_open_segs[forknum] - 1; - v = &reln->md_seg_fds[forknum][segno]; + segno = mdreln->md_num_open_segs[forknum] - 1; + v = &mdreln->md_seg_fds[forknum][segno]; for (;;) { - nblocks = _mdnblocks(reln, forknum, v); + nblocks = _mdnblocks(mdreln, forknum, v); if (nblocks > ((BlockNumber) RELSEG_SIZE)) elog(FATAL, "segment too big"); if (nblocks < ((BlockNumber) RELSEG_SIZE)) @@ -1156,7 +1216,7 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * undermines _mdfd_getseg's attempts to notice and report an error * upon access to a missing segment. */ - v = _mdfd_openseg(reln, forknum, segno, 0); + v = _mdfd_openseg(mdreln, forknum, segno, 0); if (v == NULL) return segno * ((BlockNumber) RELSEG_SIZE); } @@ -1176,6 +1236,7 @@ void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; BlockNumber priorblocks; int curopensegs; @@ -1196,14 +1257,14 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, * Truncate segments, starting at the last one. Starting at the end makes * managing the memory for the fd array easier, should there be errors. */ - curopensegs = reln->md_num_open_segs[forknum]; + curopensegs = mdreln->md_num_open_segs[forknum]; while (curopensegs > 0) { MdfdVec *v; priorblocks = (curopensegs - 1) * RELSEG_SIZE; - v = &reln->md_seg_fds[forknum][curopensegs - 1]; + v = &mdreln->md_seg_fds[forknum][curopensegs - 1]; if (priorblocks > nblocks) { @@ -1218,13 +1279,13 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, FilePathName(v->mdfd_vfd)))); if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); /* we never drop the 1st segment */ - Assert(v != &reln->md_seg_fds[forknum][0]); + Assert(v != &mdreln->md_seg_fds[forknum][0]); FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, curopensegs - 1); + _fdvec_resize(mdreln, forknum, curopensegs - 1); } else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) { @@ -1244,7 +1305,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, FilePathName(v->mdfd_vfd), nblocks))); if (!SmgrIsTemp(reln)) - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); } else { @@ -1264,6 +1325,7 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, void mdregistersync(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; int min_inactive_seg; @@ -1273,7 +1335,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) */ mdnblocks(reln, forknum); - min_inactive_seg = segno = reln->md_num_open_segs[forknum]; + min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; /* * Temporarily open inactive segments, then close them after sync. There @@ -1281,20 +1343,20 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * harmless. We don't bother to clean them up and take a risk of further * trouble. The next mdclose() will soon close them. */ - while (_mdfd_openseg(reln, forknum, segno, 0) != NULL) + while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL) segno++; while (segno > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1]; - register_dirty_segment(reln, forknum, v); + register_dirty_segment(mdreln, forknum, v); /* Close inactive segments immediately */ if (segno > min_inactive_seg) { FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, segno - 1); + _fdvec_resize(mdreln, forknum, segno - 1); } segno--; @@ -1315,6 +1377,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) void mdimmedsync(SMgrRelation reln, ForkNumber forknum) { + MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; int min_inactive_seg; @@ -1324,7 +1387,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) */ mdnblocks(reln, forknum); - min_inactive_seg = segno = reln->md_num_open_segs[forknum]; + min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; /* * Temporarily open inactive segments, then close them after sync. There @@ -1332,12 +1395,12 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * is harmless. We don't bother to clean them up and take a risk of * further trouble. The next mdclose() will soon close them. */ - while (_mdfd_openseg(reln, forknum, segno, 0) != NULL) + while (_mdfd_openseg(mdreln, forknum, segno, 0) != NULL) segno++; while (segno > 0) { - MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; + MdfdVec *v = &mdreln->md_seg_fds[forknum][segno - 1]; /* * fsyncs done through mdimmedsync() should be tracked in a separate @@ -1358,7 +1421,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) if (segno > min_inactive_seg) { FileClose(v->mdfd_vfd); - _fdvec_resize(reln, forknum, segno - 1); + _fdvec_resize(mdreln, forknum, segno - 1); } segno--; @@ -1375,14 +1438,14 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * enough to be a performance problem). */ static void -register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) +register_dirty_segment(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { FileTag tag; - INIT_MD_FILETAG(tag, reln->smgr_rlocator.locator, forknum, seg->mdfd_segno); + INIT_MD_FILETAG(tag, reln->reln.smgr_rlocator.locator, forknum, seg->mdfd_segno); /* Temp relations should never be fsync'd */ - Assert(!SmgrIsTemp(reln)); + Assert(!SmgrIsTemp(&reln->reln)); if (!RegisterSyncRequest(&tag, SYNC_REQUEST, false /* retryOnError */ )) { @@ -1500,7 +1563,7 @@ DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo) * _fdvec_resize() -- Resize the fork's open segments array */ static void -_fdvec_resize(SMgrRelation reln, +_fdvec_resize(MdSMgrRelation reln, ForkNumber forknum, int nseg) { @@ -1548,12 +1611,12 @@ _fdvec_resize(SMgrRelation reln, * returned string is palloc'd. */ static MdPathStr -_mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) +_mdfd_segpath(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno) { RelPathStr path; MdPathStr fullpath; - path = relpath(reln->smgr_rlocator, forknum); + path = relpath(reln->reln.smgr_rlocator, forknum); if (segno > 0) sprintf(fullpath.str, "%s.%u", path.str, segno); @@ -1568,7 +1631,7 @@ _mdfd_segpath(SMgrRelation reln, ForkNumber forknum, BlockNumber segno) * and make a MdfdVec object for it. Returns NULL on failure. */ static MdfdVec * -_mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, +_mdfd_openseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber segno, int oflags) { MdfdVec *v; @@ -1611,7 +1674,7 @@ _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno, * EXTENSION_CREATE case. */ static MdfdVec * -_mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, +_mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, bool skipFsync, int behavior) { MdfdVec *v; @@ -1685,7 +1748,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, char *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE, MCXT_ALLOC_ZERO); - mdextend(reln, forknum, + mdextend((SMgrRelation) reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, zerobuf, skipFsync); pfree(zerobuf); @@ -1740,7 +1803,7 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, * Get number of blocks present in a single disk file */ static BlockNumber -_mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) +_mdnblocks(MdSMgrRelation reln, ForkNumber forknum, MdfdVec *seg) { off_t len; @@ -1763,7 +1826,7 @@ _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) int mdsyncfiletag(const FileTag *ftag, char *path) { - SMgrRelation reln = smgropen(ftag->rlocator, INVALID_PROC_NUMBER); + MdSMgrRelation reln = (MdSMgrRelation) smgropen(ftag->rlocator, INVALID_PROC_NUMBER); File file; instr_time io_start; bool need_to_close; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index ebe35c04de59..7635c231ea02 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -53,84 +53,21 @@ #include "access/xlogutils.h" #include "lib/ilist.h" +#include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/ipc.h" #include "storage/md.h" #include "storage/smgr.h" +#include "port/atomics.h" #include "utils/hsearch.h" #include "utils/inval.h" +#include "utils/memutils.h" +static f_smgr *smgrsw; -/* - * This struct of function pointers defines the API between smgr.c and - * any individual storage manager module. Note that smgr subfunctions are - * generally expected to report problems via elog(ERROR). An exception is - * that smgr_unlink should use elog(WARNING), rather than erroring out, - * because we normally unlink relations during post-commit/abort cleanup, - * and so it's too late to raise an error. Also, various conditions that - * would normally be errors should be allowed during bootstrap and/or WAL - * recovery --- see comments in md.c for details. - */ -typedef struct f_smgr -{ - void (*smgr_init) (void); /* may be NULL */ - void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, - bool isRedo); - void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); - void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); - bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); - uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); - void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - void **buffers, BlockNumber nblocks); - void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, - bool skipFsync); - void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber old_blocks, BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); -} f_smgr; - -static const f_smgr smgrsw[] = { - /* magnetic disk */ - { - .smgr_init = mdinit, - .smgr_shutdown = NULL, - .smgr_open = mdopen, - .smgr_close = mdclose, - .smgr_create = mdcreate, - .smgr_exists = mdexists, - .smgr_unlink = mdunlink, - .smgr_extend = mdextend, - .smgr_zeroextend = mdzeroextend, - .smgr_prefetch = mdprefetch, - .smgr_maxcombine = mdmaxcombine, - .smgr_readv = mdreadv, - .smgr_writev = mdwritev, - .smgr_writeback = mdwriteback, - .smgr_nblocks = mdnblocks, - .smgr_truncate = mdtruncate, - .smgr_immedsync = mdimmedsync, - .smgr_registersync = mdregistersync, - } -}; +static int NSmgr = 0; -static const int NSmgr = lengthof(smgrsw); +static Size LargestSMgrRelationSize = 0; /* * Each backend has a hashtable that stores all extant SMgrRelation objects. @@ -144,6 +81,60 @@ static dlist_head unpinned_relns; static void smgrshutdown(int code, Datum arg); static void smgrdestroy(SMgrRelation reln); +#define MaxSMgrId UINT8_MAX + +SMgrId +smgr_register(const f_smgr *smgr, Size smgrrelation_size) +{ + SMgrId my_id; + MemoryContext old; + + if (process_shared_preload_libraries_done) + elog(FATAL, "SMgrs must be registered in the shared_preload_libraries phase"); + if (NSmgr == MaxSMgrId) + elog(FATAL, "Too many smgrs registered"); + if (smgr->name == NULL || *smgr->name == 0) + elog(FATAL, "smgr registered with invalid name"); + + Assert(smgr->smgr_open != NULL); + Assert(smgr->smgr_close != NULL); + Assert(smgr->smgr_create != NULL); + Assert(smgr->smgr_exists != NULL); + Assert(smgr->smgr_unlink != NULL); + Assert(smgr->smgr_extend != NULL); + Assert(smgr->smgr_zeroextend != NULL); + Assert(smgr->smgr_prefetch != NULL); + Assert(smgr->smgr_readv != NULL); + Assert(smgr->smgr_writev != NULL); + Assert(smgr->smgr_writeback != NULL); + Assert(smgr->smgr_nblocks != NULL); + Assert(smgr->smgr_truncate != NULL); + Assert(smgr->smgr_immedsync != NULL); + + old = MemoryContextSwitchTo(TopMemoryContext); + + my_id = NSmgr++; + if (my_id == 0) + smgrsw = palloc_array(f_smgr, 1); + else + smgrsw = repalloc_array(smgrsw, f_smgr, NSmgr); + + MemoryContextSwitchTo(old); + + pg_compiler_barrier(); + + if (!smgrsw) + { + NSmgr--; + elog(FATAL, "Failed to extend smgr array"); + } + + smgrsw[my_id] = *smgr; + + LargestSMgrRelationSize = Max(LargestSMgrRelationSize, smgrrelation_size); + + return my_id; +} /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage @@ -211,8 +202,11 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) /* First time through: initialize the hash table */ HASHCTL ctl; + LargestSMgrRelationSize = MAXALIGN(LargestSMgrRelationSize); + Assert(NSmgr > 0); + ctl.keysize = sizeof(RelFileLocatorBackend); - ctl.entrysize = sizeof(SMgrRelationData); + ctl.entrysize = LargestSMgrRelationSize; SMgrRelationHash = hash_create("smgr relation table", 400, &ctl, HASH_ELEM | HASH_BLOBS); dlist_init(&unpinned_relns); @@ -232,7 +226,8 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) reln->smgr_targblock = InvalidBlockNumber; for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = 0; /* we only have md.c at present */ + + reln->smgr_which = MdSMgrId; /* we only have md.c at present */ /* implementation-specific initialization */ smgrsw[reln->smgr_which].smgr_open(reln); diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index dc3521457c76..3176cdce6d7b 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -43,6 +43,7 @@ #include "replication/slotsync.h" #include "storage/fd.h" #include "storage/ipc.h" +#include "storage/md.h" #include "storage/latch.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" @@ -192,6 +193,9 @@ InitStandaloneProcess(const char *argv0) InitProcessLocalLatch(); InitializeLatchWaitSet(); + /* Initialize smgrs */ + register_builtin_dynamic_managers(); + /* * For consistency with InitPostmasterChild, initialize signal mask here. * But we don't unblock SIGQUIT or provide a default handler for it. @@ -1920,6 +1924,15 @@ process_session_preload_libraries(void) true); } +/* + * Register any internal managers. + */ +void +register_builtin_dynamic_managers(void) +{ + mdsmgr_register(); +} + /* * process any shared memory requests from preloaded libraries */ diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 6f16794eb63c..7be0983772bb 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -521,6 +521,7 @@ extern void TouchSocketLockFiles(void); extern void AddToDataDirLockFile(int target_line, const char *str); extern bool RecheckDataDirLockFile(void); extern void ValidatePgVersion(const char *path); +extern void register_builtin_dynamic_managers(void); extern void process_shared_preload_libraries(void); extern void process_session_preload_libraries(void); extern void process_shmem_requests(void); diff --git a/src/include/storage/md.h b/src/include/storage/md.h index 05bf537066eb..da1d1d339be8 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -19,6 +19,10 @@ #include "storage/smgr.h" #include "storage/sync.h" +/* registration function for md storage manager */ +extern void mdsmgr_register(void); +extern SMgrId MdSMgrId; + /* md storage manager functionality */ extern void mdinit(void); extern void mdopen(SMgrRelation reln); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 4016b206ad66..52f74f917b25 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -18,6 +18,8 @@ #include "storage/block.h" #include "storage/relfilelocator.h" +typedef uint8 SMgrId; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) @@ -51,14 +53,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - int smgr_which; /* storage manager selector */ - - /* - * for md.c; per-fork arrays of the number of open segments - * (md_num_open_segs) and the segments themselves (md_seg_fds). - */ - int md_num_open_segs[MAX_FORKNUM + 1]; - struct _MdfdVec *md_seg_fds[MAX_FORKNUM + 1]; + SMgrId smgr_which; /* storage manager selector */ /* * Pinning support. If unpinned (ie. pincount == 0), 'node' is a list @@ -73,6 +68,54 @@ typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ RelFileLocatorBackendIsTemp((smgr)->smgr_rlocator) +/* + * This struct of function pointers defines the API between smgr.c and + * any individual storage manager module. Note that smgr subfunctions are + * generally expected to report problems via elog(ERROR). An exception is + * that smgr_unlink should use elog(WARNING), rather than erroring out, + * because we normally unlink relations during post-commit/abort cleanup, + * and so it's too late to raise an error. Also, various conditions that + * would normally be errors should be allowed during bootstrap and/or WAL + * recovery --- see comments in md.c for details. + */ +typedef struct f_smgr +{ + const char *name; + void (*smgr_init) (void); /* may be NULL */ + void (*smgr_shutdown) (void); /* may be NULL */ + void (*smgr_open) (SMgrRelation reln); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + bool isRedo); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, + bool isRedo); + void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void *buffer, bool skipFsync); + void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync); + bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks); + uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); + void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + void **buffers, BlockNumber nblocks); + void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, + bool skipFsync); + void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, + BlockNumber old_blocks, BlockNumber nblocks); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); +} f_smgr; + +extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size); + extern void smgrinit(void); extern SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend); extern bool smgrexists(SMgrRelation reln, ForkNumber forknum); diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 93339ef3c58f..0f9283d6c55a 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1632,6 +1632,7 @@ ManyTestResourceKind Material MaterialPath MaterialState +MdSMgrRelationData MdfdVec MdPathStr Memoize From c2dcf7048cf657cfb0783ce483ecece88d6df582 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Fri, 13 Oct 2023 14:00:44 -0500 Subject: [PATCH 2/6] Allow extensions to override the global storage manager --- src/backend/storage/smgr/smgr.c | 4 +++- src/backend/utils/init/miscinit.c | 2 ++ src/include/storage/smgr.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 7635c231ea02..9b3e63aff557 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -69,6 +69,8 @@ static int NSmgr = 0; static Size LargestSMgrRelationSize = 0; +SMgrId storage_manager_id; + /* * Each backend has a hashtable that stores all extant SMgrRelation objects. * In addition, "unpinned" SMgrRelation objects are chained together in a list. @@ -227,7 +229,7 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = MdSMgrId; /* we only have md.c at present */ + reln->smgr_which = storage_manager_id; /* implementation-specific initialization */ smgrsw[reln->smgr_which].smgr_open(reln); diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 3176cdce6d7b..1b3ce51cfce4 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -1931,6 +1931,8 @@ void register_builtin_dynamic_managers(void) { mdsmgr_register(); + + storage_manager_id = MdSMgrId; } /* diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 52f74f917b25..629c78cfddee 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -20,6 +20,8 @@ typedef uint8 SMgrId; +extern PGDLLIMPORT SMgrId storage_manager_id; + /* * smgr.c maintains a table of SMgrRelation objects, which are essentially * cached file handles. An SMgrRelation is created (if not already present) From f13653ea7b9e8a9ec834cf2b18f9eed5e5ad9793 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Fri, 13 Oct 2023 13:57:18 -0500 Subject: [PATCH 3/6] Add checkpoint_create_hook Allows an extension to hook into CheckPointCreate(). --- src/backend/access/transam/xlog.c | 5 +++++ src/include/access/xlog.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 4b6c694a3f71..bc1044bf72d3 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -208,6 +208,8 @@ const struct config_enum_entry archive_mode_options[] = { */ CheckpointStatsData CheckpointStats; +checkpoint_create_hook_type checkpoint_create_hook = NULL; + /* * During recovery, lastFullPageWrites keeps track of full_page_writes that * the replayed WAL records indicate. It's initialized with full_page_writes @@ -7173,6 +7175,9 @@ CreateCheckPoint(int flags) */ END_CRIT_SECTION(); + if (checkpoint_create_hook != NULL) + checkpoint_create_hook(&checkPoint); + /* * In some cases there are groups of actions that must all occur on one * side or the other of a checkpoint record. Before flushing the diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d313099c027f..8aab37ef52d7 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -13,6 +13,7 @@ #include "access/xlogbackup.h" #include "access/xlogdefs.h" +#include "catalog/pg_control.h" #include "datatype/timestamp.h" #include "lib/stringinfo.h" #include "nodes/pg_list.h" @@ -59,6 +60,9 @@ extern PGDLLIMPORT int wal_decode_buffer_size; extern PGDLLIMPORT int CheckPointSegments; +typedef void (*checkpoint_create_hook_type) (const CheckPoint *); +extern PGDLLIMPORT checkpoint_create_hook_type checkpoint_create_hook; + /* Archive modes */ typedef enum ArchiveMode { From 87198ce66cba56fab340cc590c5fa03645697eb3 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Wed, 20 Sep 2023 14:23:38 -0500 Subject: [PATCH 4/6] Add contrib/fsync_checker fsync_checker is an extension which overrides the global storage manager to check for volatile relations, those which have been written but not synced to disk. --- contrib/Makefile | 1 + contrib/fsync_checker/fsync_checker.control | 5 + contrib/fsync_checker/fsync_checker_smgr.c | 249 ++++++++++++++++++++ contrib/fsync_checker/meson.build | 22 ++ contrib/meson.build | 1 + src/tools/pgindent/typedefs.list | 2 + 6 files changed, 280 insertions(+) create mode 100644 contrib/fsync_checker/fsync_checker.control create mode 100644 contrib/fsync_checker/fsync_checker_smgr.c create mode 100644 contrib/fsync_checker/meson.build diff --git a/contrib/Makefile b/contrib/Makefile index 952855d9b61b..1c9f22b1c86a 100644 --- a/contrib/Makefile +++ b/contrib/Makefile @@ -19,6 +19,7 @@ SUBDIRS = \ dict_int \ dict_xsyn \ earthdistance \ + fsync_checker \ file_fdw \ fuzzystrmatch \ hstore \ diff --git a/contrib/fsync_checker/fsync_checker.control b/contrib/fsync_checker/fsync_checker.control new file mode 100644 index 000000000000..7d0e36434bfa --- /dev/null +++ b/contrib/fsync_checker/fsync_checker.control @@ -0,0 +1,5 @@ +# fsync_checker extension +comment = 'SMGR extension for checking volatile writes' +default_version = '1.0' +module_pathname = '$libdir/fsync_checker' +relocatable = true diff --git a/contrib/fsync_checker/fsync_checker_smgr.c b/contrib/fsync_checker/fsync_checker_smgr.c new file mode 100644 index 000000000000..97ad0f78da86 --- /dev/null +++ b/contrib/fsync_checker/fsync_checker_smgr.c @@ -0,0 +1,249 @@ +#include "postgres.h" + +#include "access/xlog.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/lwlock.h" +#include "storage/shmem.h" +#include "storage/smgr.h" +#include "storage/md.h" +#include "utils/hsearch.h" + +PG_MODULE_MAGIC; + +typedef struct +{ + RelFileLocator locator; + ForkNumber forknum; +} VolatileRelnKey; + +typedef struct +{ + VolatileRelnKey key; + XLogRecPtr lsn; +} VolatileRelnEntry; + +void _PG_init(void); + +static void fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync); +static void fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum); +static void fsync_checker_writev(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void **buffers, + BlockNumber nblocks, bool skipFsync); +static void fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks); +static void fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync); + +static void fsync_checker_checkpoint_create(const CheckPoint *checkPoint); +static void fsync_checker_shmem_request(void); +static void fsync_checker_shmem_startup(void); + +static void add_reln(SMgrRelation reln, ForkNumber forknum); +static void remove_reln(SMgrRelation reln, ForkNumber forknum); + +static SMgrId fsync_checker_smgr_id; +static const struct f_smgr fsync_checker_smgr = { + .name = "fsync_checker", + .smgr_init = mdinit, + .smgr_shutdown = NULL, + .smgr_open = mdopen, + .smgr_close = mdclose, + .smgr_create = mdcreate, + .smgr_exists = mdexists, + .smgr_unlink = mdunlink, + .smgr_extend = fsync_checker_extend, + .smgr_zeroextend = fsync_checker_zeroextend, + .smgr_prefetch = mdprefetch, + .smgr_maxcombine = mdmaxcombine, + .smgr_readv = mdreadv, + .smgr_writev = fsync_checker_writev, + .smgr_writeback = fsync_checker_writeback, + .smgr_nblocks = mdnblocks, + .smgr_truncate = mdtruncate, + .smgr_immedsync = fsync_checker_immedsync, + .smgr_registersync = mdregistersync, +}; + +static HTAB *volatile_relns; +static LWLock *volatile_relns_lock; +static shmem_request_hook_type prev_shmem_request_hook; +static shmem_startup_hook_type prev_shmem_startup_hook; +static checkpoint_create_hook_type prev_checkpoint_create_hook; + +void +_PG_init(void) +{ + prev_checkpoint_create_hook = checkpoint_create_hook; + checkpoint_create_hook = fsync_checker_checkpoint_create; + + prev_shmem_request_hook = shmem_request_hook; + shmem_request_hook = fsync_checker_shmem_request; + + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = fsync_checker_shmem_startup; + + /* + * Relation size of 0 means we can just defer to md, but it would be nice + * to just expose this functionality, so if I needed my own relation, I + * could use MdSmgrRelation as the parent. + */ + fsync_checker_smgr_id = smgr_register(&fsync_checker_smgr, 0); + + storage_manager_id = fsync_checker_smgr_id; +} + +static void +fsync_checker_checkpoint_create(const CheckPoint *checkPoint) +{ + long num_entries; + HASH_SEQ_STATUS status; + VolatileRelnEntry *entry; + + if (prev_checkpoint_create_hook) + prev_checkpoint_create_hook(checkPoint); + + LWLockAcquire(volatile_relns_lock, LW_EXCLUSIVE); + + hash_seq_init(&status, volatile_relns); + + num_entries = hash_get_num_entries(volatile_relns); + elog(INFO, "Analyzing %ld volatile relations", num_entries); + while ((entry = hash_seq_search(&status))) + { + if (entry->lsn < checkPoint->redo) + { + RelPathStr path; + + path = relpathperm(entry->key.locator, entry->key.forknum); + + elog(WARNING, "Relation not previously synced: %s", path.str); + } + } + + LWLockRelease(volatile_relns_lock); +} + +static void +fsync_checker_shmem_request(void) +{ + if (prev_shmem_request_hook) + prev_shmem_request_hook(); + + RequestAddinShmemSpace(hash_estimate_size(1024, sizeof(VolatileRelnEntry))); + RequestNamedLWLockTranche("fsync_checker volatile relns lock", 1); +} + +static void +fsync_checker_shmem_startup(void) +{ + HASHCTL ctl; + + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + + ctl.keysize = sizeof(VolatileRelnKey); + ctl.entrysize = sizeof(VolatileRelnEntry); + volatile_relns = NULL; + volatile_relns_lock = NULL; + + /* + * Create or attach to the shared memory state, including hash table + */ + LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); + + volatile_relns = ShmemInitHash("fsync_checker volatile relns", + 1024, 1024, &ctl, HASH_BLOBS | HASH_ELEM); + volatile_relns_lock = &GetNamedLWLockTranche("fsync_checker volatile relns lock")->lock; + + LWLockRelease(AddinShmemInitLock); +} + +static void +add_reln(SMgrRelation reln, ForkNumber forknum) +{ + bool found; + XLogRecPtr lsn; + VolatileRelnKey key; + VolatileRelnEntry *entry; + + key.locator = reln->smgr_rlocator.locator; + key.forknum = forknum; + + lsn = GetXLogWriteRecPtr(); + + LWLockAcquire(volatile_relns_lock, LW_EXCLUSIVE); + + entry = hash_search(volatile_relns, &key, HASH_ENTER, &found); + if (!found) + entry->lsn = lsn; + + LWLockRelease(volatile_relns_lock); +} + +static void +remove_reln(SMgrRelation reln, ForkNumber forknum) +{ + VolatileRelnKey key; + + key.locator = reln->smgr_rlocator.locator; + key.forknum = forknum; + + LWLockAcquire(volatile_relns_lock, LW_EXCLUSIVE); + + hash_search(volatile_relns, &key, HASH_REMOVE, NULL); + + LWLockRelease(volatile_relns_lock); +} + +static void +fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync) +{ + if (!SmgrIsTemp(reln) && !skipFsync) + add_reln(reln, forknum); + + mdextend(reln, forknum, blocknum, buffer, skipFsync); +} + +static void +fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum) +{ + if (!SmgrIsTemp(reln)) + remove_reln(reln, forknum); + + mdimmedsync(reln, forknum); +} + +static void +fsync_checker_writev(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void **buffers, + BlockNumber nblocks, bool skipFsync) +{ + if (!SmgrIsTemp(reln) && !skipFsync) + add_reln(reln, forknum); + + mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync); +} + +static void +fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks) +{ + if (!SmgrIsTemp(reln)) + remove_reln(reln, forknum); + + mdwriteback(reln, forknum, blocknum, nblocks); +} + +static void +fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync) +{ + if (!SmgrIsTemp(reln) && !skipFsync) + add_reln(reln, forknum); + + mdzeroextend(reln, forknum, blocknum, nblocks, skipFsync); +} diff --git a/contrib/fsync_checker/meson.build b/contrib/fsync_checker/meson.build new file mode 100644 index 000000000000..ce6ed7fe90bb --- /dev/null +++ b/contrib/fsync_checker/meson.build @@ -0,0 +1,22 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group + +fsync_checker_sources = files( + 'fsync_checker_smgr.c', +) + +if host_system == 'windows' + fsync_checker_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'fsync_checker', + '--FILEDESC', 'fsync_checker - SMGR extension for checking volatile relations',]) +endif + +fsync_checker = shared_module('fsync_checker', + fsync_checker_sources, + kwargs: contrib_mod_args, +) +contrib_targets += fsync_checker + +install_data( + 'fsync_checker.control', + kwargs: contrib_data_args, +) diff --git a/contrib/meson.build b/contrib/meson.build index 1ba73ebd67a3..c48fb1387518 100644 --- a/contrib/meson.build +++ b/contrib/meson.build @@ -28,6 +28,7 @@ subdir('dict_int') subdir('dict_xsyn') subdir('earthdistance') subdir('file_fdw') +subdir('fsync_checker') subdir('fuzzystrmatch') subdir('hstore') subdir('hstore_plperl') diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 0f9283d6c55a..ba06315090f9 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -3150,6 +3150,8 @@ ViewStmt VirtualTransactionId VirtualTupleTableSlot VolatileFunctionStatus +VolatileRelnEntry +VolatileRelnKey Vsrt WAIT_ORDER WALAvailability From 790fe554df59f6a59dca99c2e912fd4fee57eb70 Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Sat, 12 Oct 2024 22:01:28 +0100 Subject: [PATCH 5/6] Refactor smgr API: mdcreate needs the old relfilelocator With this change, mdcreate receives the old relfilelocator along with the new for operations that create a new file for an existing relation. This is required for tde_heap in pg_tde. --- src/backend/access/heap/heapam_handler.c | 10 ++++++---- src/backend/access/transam/xlogutils.c | 2 +- src/backend/catalog/heap.c | 2 +- src/backend/catalog/index.c | 2 +- src/backend/catalog/storage.c | 8 ++++---- src/backend/commands/sequence.c | 2 +- src/backend/commands/tablecmds.c | 4 ++-- src/backend/storage/buffer/bufmgr.c | 7 ++++--- src/backend/storage/smgr/md.c | 2 +- src/backend/storage/smgr/smgr.c | 4 ++-- src/backend/utils/cache/relcache.c | 2 +- src/include/catalog/storage.h | 3 ++- src/include/storage/md.h | 2 +- src/include/storage/smgr.h | 4 ++-- 14 files changed, 29 insertions(+), 25 deletions(-) diff --git a/src/backend/access/heap/heapam_handler.c b/src/backend/access/heap/heapam_handler.c index 4da4dc845803..7563d4d6d597 100644 --- a/src/backend/access/heap/heapam_handler.c +++ b/src/backend/access/heap/heapam_handler.c @@ -588,6 +588,8 @@ heapam_relation_set_new_filelocator(Relation rel, { SMgrRelation srel; + RelFileLocator oldlocator = rel->rd_locator; + /* * Initialize to the minimum XID that could put tuples in the table. We * know that no xacts older than RecentXmin are still running, so that @@ -605,7 +607,7 @@ heapam_relation_set_new_filelocator(Relation rel, */ *minmulti = GetOldestMultiXactId(); - srel = RelationCreateStorage(*newrlocator, persistence, true); + srel = RelationCreateStorage(oldlocator, *newrlocator, persistence, true); /* * If required, set up an init fork for an unlogged table so that it can @@ -615,7 +617,7 @@ heapam_relation_set_new_filelocator(Relation rel, { Assert(rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_TOASTVALUE); - smgrcreate(srel, INIT_FORKNUM, false); + smgrcreate(oldlocator, srel, INIT_FORKNUM, false); log_smgrcreate(newrlocator, INIT_FORKNUM); } @@ -648,7 +650,7 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) * NOTE: any conflict in relfilenumber value will be caught in * RelationCreateStorage(). */ - dstrel = RelationCreateStorage(*newrlocator, rel->rd_rel->relpersistence, true); + dstrel = RelationCreateStorage(rel->rd_locator, *newrlocator, rel->rd_rel->relpersistence, true); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, @@ -660,7 +662,7 @@ heapam_relation_copy_data(Relation rel, const RelFileLocator *newrlocator) { if (smgrexists(RelationGetSmgr(rel), forkNum)) { - smgrcreate(dstrel, forkNum, false); + smgrcreate(rel->rd_locator, dstrel, forkNum, false); /* * WAL log creation if the relation is persistent, or this is the diff --git a/src/backend/access/transam/xlogutils.c b/src/backend/access/transam/xlogutils.c index c389b27f77d4..2179d2f73fa9 100644 --- a/src/backend/access/transam/xlogutils.c +++ b/src/backend/access/transam/xlogutils.c @@ -487,7 +487,7 @@ XLogReadBufferExtended(RelFileLocator rlocator, ForkNumber forknum, * filesystem loses an inode during a crash. Better to write the data * until we are actually told to delete the file.) */ - smgrcreate(smgr, forknum, true); + smgrcreate(rlocator, smgr, forknum, true); lastblock = smgrnblocks(smgr, forknum); diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index bd3554c0bfdc..251d22f50b2f 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -386,7 +386,7 @@ heap_create(const char *relname, relpersistence, relfrozenxid, relminmxid); else if (RELKIND_HAS_STORAGE(rel->rd_rel->relkind)) - RelationCreateStorage(rel->rd_locator, relpersistence, true); + RelationCreateStorage(rel->rd_locator, rel->rd_locator, relpersistence, true); else Assert(false); } diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 739a92bdcc1c..8d93bc224829 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -3089,7 +3089,7 @@ index_build(Relation heapRelation, if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED && !smgrexists(RelationGetSmgr(indexRelation), INIT_FORKNUM)) { - smgrcreate(RelationGetSmgr(indexRelation), INIT_FORKNUM, false); + smgrcreate(indexRelation->rd_locator, RelationGetSmgr(indexRelation), INIT_FORKNUM, false); log_smgrcreate(&indexRelation->rd_locator, INIT_FORKNUM); indexRelation->rd_indam->ambuildempty(indexRelation); } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index 624ed41bbf3a..59fa01decc59 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -118,7 +118,7 @@ AddPendingSync(const RelFileLocator *rlocator) * pass register_delete = false. */ SMgrRelation -RelationCreateStorage(RelFileLocator rlocator, char relpersistence, +RelationCreateStorage(RelFileLocator oldlocator, RelFileLocator rlocator, char relpersistence, bool register_delete) { SMgrRelation srel; @@ -147,7 +147,7 @@ RelationCreateStorage(RelFileLocator rlocator, char relpersistence, } srel = smgropen(rlocator, procNumber); - smgrcreate(srel, MAIN_FORKNUM, false); + smgrcreate(oldlocator, srel, MAIN_FORKNUM, false); if (needs_wal) log_smgrcreate(&srel->smgr_rlocator.locator, MAIN_FORKNUM); @@ -976,7 +976,7 @@ smgr_redo(XLogReaderState *record) SMgrRelation reln; reln = smgropen(xlrec->rlocator, INVALID_PROC_NUMBER); - smgrcreate(reln, xlrec->forkNum, true); + smgrcreate(xlrec->rlocator, reln, xlrec->forkNum, true); } else if (info == XLOG_SMGR_TRUNCATE) { @@ -997,7 +997,7 @@ smgr_redo(XLogReaderState *record) * XLogReadBufferForRedo, we prefer to recreate the rel and replay the * log as best we can until the drop is seen. */ - smgrcreate(reln, MAIN_FORKNUM, true); + smgrcreate(xlrec->rlocator, reln, MAIN_FORKNUM, true); /* * Before we perform the truncation, update minimum recovery point to diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c index 4b7c5113aabe..d8c560a11b28 100644 --- a/src/backend/commands/sequence.c +++ b/src/backend/commands/sequence.c @@ -344,7 +344,7 @@ fill_seq_with_data(Relation rel, HeapTuple tuple) SMgrRelation srel; srel = smgropen(rel->rd_locator, INVALID_PROC_NUMBER); - smgrcreate(srel, INIT_FORKNUM, false); + smgrcreate(rel->rd_locator, srel, INIT_FORKNUM, false); log_smgrcreate(&rel->rd_locator, INIT_FORKNUM); fill_seq_fork_with_data(rel, tuple, INIT_FORKNUM); FlushRelationBuffers(rel); diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 129c97fdf28f..152b0d38969b 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -16432,7 +16432,7 @@ index_copy_data(Relation rel, RelFileLocator newrlocator) * NOTE: any conflict in relfilenumber value will be caught in * RelationCreateStorage(). */ - dstrel = RelationCreateStorage(newrlocator, rel->rd_rel->relpersistence, true); + dstrel = RelationCreateStorage(rel->rd_locator, newrlocator, rel->rd_rel->relpersistence, true); /* copy main fork */ RelationCopyStorage(RelationGetSmgr(rel), dstrel, MAIN_FORKNUM, @@ -16444,7 +16444,7 @@ index_copy_data(Relation rel, RelFileLocator newrlocator) { if (smgrexists(RelationGetSmgr(rel), forkNum)) { - smgrcreate(dstrel, forkNum, false); + smgrcreate(rel->rd_locator, dstrel, forkNum, false); /* * WAL log creation if the relation is persistent, or this is the diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 79ca9d18d07b..8abe47833c3a 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -945,7 +945,7 @@ ExtendBufferedRelTo(BufferManagerRelation bmr, /* recheck, fork might have been created concurrently */ if (!smgrexists(bmr.smgr, fork)) - smgrcreate(bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY); + smgrcreate(bmr.rel->rd_locator, bmr.smgr, fork, flags & EB_PERFORMING_RECOVERY); UnlockRelationForExtension(bmr.rel, ExclusiveLock); } @@ -4743,7 +4743,7 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator, * directory. Therefore, each individual relation doesn't need to be * registered for cleanup. */ - RelationCreateStorage(dst_rlocator, relpersistence, false); + RelationCreateStorage(src_rlocator, dst_rlocator, relpersistence, false); /* copy main fork. */ RelationCopyStorageUsingBuffer(src_rlocator, dst_rlocator, MAIN_FORKNUM, @@ -4755,7 +4755,8 @@ CreateAndCopyRelationData(RelFileLocator src_rlocator, { if (smgrexists(src_rel, forkNum)) { - smgrcreate(dst_rel, forkNum, false); + /* TODO: for sure? */ + smgrcreate(src_rel->smgr_rlocator.locator, dst_rel, forkNum, false); /* * WAL log creation if the relation is persistent, or this is the diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 5a2072e0816f..1766bbe1e575 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -245,7 +245,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum) * If isRedo is true, it's okay for the relation to exist already. */ void -mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) +mdcreate(RelFileLocator /* reln */, SMgrRelation reln, ForkNumber forknum, bool isRedo) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *mdfd; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 9b3e63aff557..0498fd6c3177 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -408,9 +408,9 @@ smgrexists(SMgrRelation reln, ForkNumber forknum) * to be created. */ void -smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo) +smgrcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo) { - smgrsw[reln->smgr_which].smgr_create(reln, forknum, isRedo); + smgrsw[reln->smgr_which].smgr_create(relold, reln, forknum, isRedo); } /* diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index 9f54a9e72b73..b565d38b7b67 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -3832,7 +3832,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence) /* handle these directly, at least for now */ SMgrRelation srel; - srel = RelationCreateStorage(newrlocator, persistence, true); + srel = RelationCreateStorage(relation->rd_locator, newrlocator, persistence, true); smgrclose(srel); } else diff --git a/src/include/catalog/storage.h b/src/include/catalog/storage.h index ba99225b0a37..ecc3b792f4f5 100644 --- a/src/include/catalog/storage.h +++ b/src/include/catalog/storage.h @@ -22,7 +22,8 @@ /* GUC variables */ extern PGDLLIMPORT int wal_skip_threshold; -extern SMgrRelation RelationCreateStorage(RelFileLocator rlocator, +extern SMgrRelation RelationCreateStorage(RelFileLocator oldlocator, + RelFileLocator rlocator, char relpersistence, bool register_delete); extern void RelationDropStorage(Relation rel); diff --git a/src/include/storage/md.h b/src/include/storage/md.h index da1d1d339be8..61c0e85dd745 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -27,7 +27,7 @@ extern SMgrId MdSMgrId; extern void mdinit(void); extern void mdopen(SMgrRelation reln); extern void mdclose(SMgrRelation reln, ForkNumber forknum); -extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); +extern void mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo); extern bool mdexists(SMgrRelation reln, ForkNumber forknum); extern void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo); extern void mdextend(SMgrRelation reln, ForkNumber forknum, diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 629c78cfddee..5b2b6de91c4d 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -87,7 +87,7 @@ typedef struct f_smgr void (*smgr_shutdown) (void); /* may be NULL */ void (*smgr_open) (SMgrRelation reln); void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_create) (SMgrRelation reln, ForkNumber forknum, + void (*smgr_create) (RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo); bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, @@ -128,7 +128,7 @@ extern void smgrdestroyall(void); extern void smgrrelease(SMgrRelation reln); extern void smgrreleaseall(void); extern void smgrreleaserellocator(RelFileLocatorBackend rlocator); -extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo); +extern void smgrcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo); extern void smgrdosyncall(SMgrRelation *rels, int nrels); extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo); extern void smgrextend(SMgrRelation reln, ForkNumber forknum, From 2b043f18399aad8b0348f5d732f4e2aac40b163a Mon Sep 17 00:00:00 2001 From: Zsolt Parragi Date: Mon, 2 Dec 2024 07:47:07 +0000 Subject: [PATCH 6/6] SMGR GUC variable and chaining The overall goal of this commit is to introduce a user interface to the previous SMGR patch. The idea is to allow a simple configuration for multiple "modificator" SMGRs similar to the fsync_checker in the original proposal. * Extensions should be able to declare a named lists of SMGR implementations, also specifying if the given SMGR is an "end" implementation for actual storage, or if it is a modifier implementation for some other purpose. * Users should be able to specify a list of SMGRs: possibly multiple modifiers, and one storage implementation at the end to configure how the storage manager is constructed. This commit introduces a new GUC variable, `smgr_chain`, which allows users to configure multiple SMGR implementations: it is a comma separated list, where the last entry most be a storage implementation, the others must be modifiers. The default value of this variable is "md". The internal storage manager API is also refactored to include an easy way for SMGR implementations to support proper chaining. Modifier SMGR implementations also only have to implement the functions they actually change, and can leave everything else as empty (NULL). And with this change we can make the functions of the md smgr static. The fsync example extension is also modified to match the new API. --- contrib/fsync_checker/fsync_checker_smgr.c | 57 ++-- src/backend/postmaster/postmaster.c | 2 + src/backend/storage/smgr/md.c | 102 +++++--- src/backend/storage/smgr/smgr.c | 247 ++++++++++++++---- src/backend/tcop/postgres.c | 2 + src/backend/utils/init/miscinit.c | 63 ++++- src/backend/utils/misc/guc_tables.c | 11 + src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/miscadmin.h | 2 + src/include/storage/md.h | 28 -- src/include/storage/smgr.h | 93 +++++-- src/tools/pgindent/typedefs.list | 1 + 12 files changed, 448 insertions(+), 161 deletions(-) diff --git a/contrib/fsync_checker/fsync_checker_smgr.c b/contrib/fsync_checker/fsync_checker_smgr.c index 97ad0f78da86..626ff0587645 100644 --- a/contrib/fsync_checker/fsync_checker_smgr.c +++ b/contrib/fsync_checker/fsync_checker_smgr.c @@ -27,15 +27,15 @@ typedef struct void _PG_init(void); static void fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void *buffer, bool skipFsync); -static void fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum); + const void *buffer, bool skipFsync, SmgrChainIndex chain_index); +static void fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); static void fsync_checker_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, - BlockNumber nblocks, bool skipFsync); + BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index); static void fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index); static void fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index); static void fsync_checker_checkpoint_create(const CheckPoint *checkPoint); static void fsync_checker_shmem_request(void); @@ -47,24 +47,25 @@ static void remove_reln(SMgrRelation reln, ForkNumber forknum); static SMgrId fsync_checker_smgr_id; static const struct f_smgr fsync_checker_smgr = { .name = "fsync_checker", - .smgr_init = mdinit, + .chain_position = SMGR_CHAIN_MODIFIER, + .smgr_init = NULL, .smgr_shutdown = NULL, - .smgr_open = mdopen, - .smgr_close = mdclose, - .smgr_create = mdcreate, - .smgr_exists = mdexists, - .smgr_unlink = mdunlink, + .smgr_open = NULL, + .smgr_close = NULL, + .smgr_create = NULL, + .smgr_exists = NULL, + .smgr_unlink = NULL, .smgr_extend = fsync_checker_extend, .smgr_zeroextend = fsync_checker_zeroextend, - .smgr_prefetch = mdprefetch, - .smgr_maxcombine = mdmaxcombine, - .smgr_readv = mdreadv, + .smgr_prefetch = NULL, + .smgr_maxcombine = NULL, + .smgr_readv = NULL, .smgr_writev = fsync_checker_writev, .smgr_writeback = fsync_checker_writeback, - .smgr_nblocks = mdnblocks, - .smgr_truncate = mdtruncate, + .smgr_nblocks = NULL, + .smgr_truncate = NULL, .smgr_immedsync = fsync_checker_immedsync, - .smgr_registersync = mdregistersync, + .smgr_registersync = NULL, }; static HTAB *volatile_relns; @@ -91,8 +92,6 @@ _PG_init(void) * could use MdSmgrRelation as the parent. */ fsync_checker_smgr_id = smgr_register(&fsync_checker_smgr, 0); - - storage_manager_id = fsync_checker_smgr_id; } static void @@ -200,50 +199,50 @@ remove_reln(SMgrRelation reln, ForkNumber forknum) static void fsync_checker_extend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void *buffer, bool skipFsync) + const void *buffer, bool skipFsync, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln) && !skipFsync) add_reln(reln, forknum); - mdextend(reln, forknum, blocknum, buffer, skipFsync); + smgr_extend_next(reln, forknum, blocknum, buffer, skipFsync, chain_index + 1); } static void -fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum) +fsync_checker_immedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln)) remove_reln(reln, forknum); - mdimmedsync(reln, forknum); + smgr_immedsync_next(reln, forknum, chain_index + 1); } static void fsync_checker_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, - BlockNumber nblocks, bool skipFsync) + BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln) && !skipFsync) add_reln(reln, forknum); - mdwritev(reln, forknum, blocknum, buffers, nblocks, skipFsync); + smgr_writev_next(reln, forknum, blocknum, buffers, nblocks, skipFsync, chain_index + 1); } static void fsync_checker_writeback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks) + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln)) remove_reln(reln, forknum); - mdwriteback(reln, forknum, blocknum, nblocks); + smgr_writeback_next(reln, forknum, blocknum, nblocks, chain_index + 1); } static void fsync_checker_zeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync) + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index) { if (!SmgrIsTemp(reln) && !skipFsync) add_reln(reln, forknum); - mdzeroextend(reln, forknum, blocknum, nblocks, skipFsync); + smgr_zeroextend_next(reln, forknum, blocknum, nblocks, skipFsync, chain_index + 1); } diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 61cbc21bf011..0497dce756d4 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -925,6 +925,8 @@ PostmasterMain(int argc, char *argv[]) */ process_shared_preload_libraries(); + process_smgr_chain(); + /* * Initialize SSL library, if specified. */ diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 1766bbe1e575..963bd0e9cde7 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -124,6 +124,33 @@ typedef MdSMgrRelationData *MdSMgrRelation; /* don't try to open a segment, if not already open */ #define EXTENSION_DONT_OPEN (1 << 5) +/* md storage manager functionality */ +static void mdinit(void); +static void mdopen(SMgrRelation reln, SmgrChainIndex chain_index); +static void mdclose(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +static bool mdexists(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +static void mdextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, const void *buffer, bool skipFsync, SmgrChainIndex chain_index); +static void mdzeroextend(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index); +static bool mdprefetch(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, int nblocks, SmgrChainIndex chain_index); +static uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, SmgrChainIndex chain_index); +static void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); +static void mdwritev(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index); +static void mdwriteback(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index); +static BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdtruncate(SMgrRelation reln, ForkNumber forknum, + BlockNumber old_blocks, BlockNumber nblocks, SmgrChainIndex chain_index); +static void mdimmedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +static void mdregistersync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); /* * Fixed-length string to represent paths to files that need to be built by @@ -151,6 +178,7 @@ mdsmgr_register(void) /* magnetic disk */ f_smgr md_smgr = (f_smgr) { .name = "md", + .chain_position = SMGR_CHAIN_TAIL, .smgr_init = mdinit, .smgr_shutdown = NULL, .smgr_open = mdopen, @@ -210,7 +238,7 @@ _mdfd_open_flags(void) /* * mdinit() -- Initialize private state for magnetic disk storage manager. */ -void +static void mdinit(void) { MdCxt = AllocSetContextCreate(TopMemoryContext, @@ -223,8 +251,8 @@ mdinit(void) * * Note: this will return true for lingering files, with pending deletions */ -bool -mdexists(SMgrRelation reln, ForkNumber forknum) +static bool +mdexists(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -234,7 +262,7 @@ mdexists(SMgrRelation reln, ForkNumber forknum) * which already closes relations when dropping them. */ if (!InRecovery) - mdclose(reln, forknum); + mdclose(reln, forknum, 0); return (mdopenfork(mdreln, forknum, EXTENSION_RETURN_NULL) != NULL); } @@ -244,8 +272,8 @@ mdexists(SMgrRelation reln, ForkNumber forknum) * * If isRedo is true, it's okay for the relation to exist already. */ -void -mdcreate(RelFileLocator /* reln */, SMgrRelation reln, ForkNumber forknum, bool isRedo) +static void +mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *mdfd; @@ -360,8 +388,8 @@ mdcreate(RelFileLocator /* reln */, SMgrRelation reln, ForkNumber forknum, bool * Note: any failure should be reported as WARNING not ERROR, because * we are usually not in a transaction anymore when this is called. */ -void -mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo) +static void +mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) { /* Now do the per-fork work */ if (forknum == InvalidForkNumber) @@ -510,9 +538,9 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo) * EOF). Note that we assume writing a block beyond current EOF * causes intervening file space to become filled with zeroes. */ -void +static void mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void *buffer, bool skipFsync) + const void *buffer, bool skipFsync, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; off_t seekpos; @@ -576,9 +604,9 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * Similar to mdextend(), except the relation can be extended by multiple * blocks at once and the added blocks will be filled with zeroes. */ -void +static void mdzeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync) + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; @@ -727,8 +755,8 @@ mdopenfork(MdSMgrRelation reln, ForkNumber forknum, int behavior) /* * mdopen() -- Initialize newly-opened relation. */ -void -mdopen(SMgrRelation reln) +static void +mdopen(SMgrRelation reln, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -740,8 +768,8 @@ mdopen(SMgrRelation reln) /* * mdclose() -- Close the specified relation, if it isn't closed already. */ -void -mdclose(SMgrRelation reln, ForkNumber forknum) +static void +mdclose(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; int nopensegs = mdreln->md_num_open_segs[forknum]; @@ -764,9 +792,9 @@ mdclose(SMgrRelation reln, ForkNumber forknum) /* * mdprefetch() -- Initiate asynchronous read of the specified blocks of a relation */ -bool +static bool mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - int nblocks) + int nblocks, SmgrChainIndex chain_index) { #ifdef USE_PREFETCH MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -862,9 +890,9 @@ buffers_to_iovec(struct iovec *iov, void **buffers, int nblocks) * mdmaxcombine() -- Return the maximum number of total blocks that can be * combined with an IO starting at blocknum. */ -uint32 +static uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum) + BlockNumber blocknum, SmgrChainIndex index) { BlockNumber segoff; @@ -876,9 +904,9 @@ mdmaxcombine(SMgrRelation reln, ForkNumber forknum, /* * mdreadv() -- Read the specified blocks from a relation. */ -void +static void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks) + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -999,9 +1027,9 @@ mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * relation (ie, those before the current EOF). To extend a relation, * use mdextend(). */ -void +static void mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, bool skipFsync) + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -1106,9 +1134,9 @@ mdwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * This accepts a range of blocks because flushing several pages at once is * considerably more efficient than doing so individually. */ -void +static void mdwriteback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks) + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; @@ -1167,8 +1195,8 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, * called, then only segments up to the last one actually touched * are present in the array. */ -BlockNumber -mdnblocks(SMgrRelation reln, ForkNumber forknum) +static BlockNumber +mdnblocks(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; MdfdVec *v; @@ -1232,9 +1260,9 @@ mdnblocks(SMgrRelation reln, ForkNumber forknum) * sure we have opened all active segments, so that truncate loop will get * them all! */ -void +static void mdtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber curnblk, BlockNumber nblocks) + BlockNumber curnblk, BlockNumber nblocks, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; BlockNumber priorblocks; @@ -1322,8 +1350,8 @@ mdtruncate(SMgrRelation reln, ForkNumber forknum, /* * mdregistersync() -- Mark whole relation as needing fsync */ -void -mdregistersync(SMgrRelation reln, ForkNumber forknum) +static void +mdregistersync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; @@ -1333,7 +1361,7 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * NOTE: mdnblocks makes sure we have opened all active segments, so that * the loop below will get them all! */ - mdnblocks(reln, forknum); + mdnblocks(reln, forknum, 0); min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; @@ -1374,8 +1402,8 @@ mdregistersync(SMgrRelation reln, ForkNumber forknum) * crash before the next checkpoint syncs the newly-inactive segment, that * segment may survive recovery, reintroducing unwanted data into the table. */ -void -mdimmedsync(SMgrRelation reln, ForkNumber forknum) +static void +mdimmedsync(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) { MdSMgrRelation mdreln = (MdSMgrRelation) reln; int segno; @@ -1385,7 +1413,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) * NOTE: mdnblocks makes sure we have opened all active segments, so that * the loop below will get them all! */ - mdnblocks(reln, forknum); + mdnblocks(reln, forknum, 0); min_inactive_seg = segno = mdreln->md_num_open_segs[forknum]; @@ -1750,7 +1778,7 @@ _mdfd_getseg(MdSMgrRelation reln, ForkNumber forknum, BlockNumber blkno, mdextend((SMgrRelation) reln, forknum, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, - zerobuf, skipFsync); + zerobuf, skipFsync, 0); pfree(zerobuf); } flags = O_CREAT; diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 0498fd6c3177..088925637682 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -63,13 +63,13 @@ #include "utils/inval.h" #include "utils/memutils.h" -static f_smgr *smgrsw; +f_smgr *smgrsw; static int NSmgr = 0; static Size LargestSMgrRelationSize = 0; -SMgrId storage_manager_id; +SMgrChain storage_manager_chain; /* * Each backend has a hashtable that stores all extant SMgrRelation objects. @@ -98,20 +98,23 @@ smgr_register(const f_smgr *smgr, Size smgrrelation_size) if (smgr->name == NULL || *smgr->name == 0) elog(FATAL, "smgr registered with invalid name"); - Assert(smgr->smgr_open != NULL); - Assert(smgr->smgr_close != NULL); - Assert(smgr->smgr_create != NULL); - Assert(smgr->smgr_exists != NULL); - Assert(smgr->smgr_unlink != NULL); - Assert(smgr->smgr_extend != NULL); - Assert(smgr->smgr_zeroextend != NULL); - Assert(smgr->smgr_prefetch != NULL); - Assert(smgr->smgr_readv != NULL); - Assert(smgr->smgr_writev != NULL); - Assert(smgr->smgr_writeback != NULL); - Assert(smgr->smgr_nblocks != NULL); - Assert(smgr->smgr_truncate != NULL); - Assert(smgr->smgr_immedsync != NULL); + if (smgr->chain_position == SMGR_CHAIN_TAIL) + { + Assert(smgr->smgr_open != NULL); + Assert(smgr->smgr_close != NULL); + Assert(smgr->smgr_create != NULL); + Assert(smgr->smgr_exists != NULL); + Assert(smgr->smgr_unlink != NULL); + Assert(smgr->smgr_extend != NULL); + Assert(smgr->smgr_zeroextend != NULL); + Assert(smgr->smgr_prefetch != NULL); + Assert(smgr->smgr_readv != NULL); + Assert(smgr->smgr_writev != NULL); + Assert(smgr->smgr_writeback != NULL); + Assert(smgr->smgr_nblocks != NULL); + Assert(smgr->smgr_truncate != NULL); + Assert(smgr->smgr_immedsync != NULL); + } old = MemoryContextSwitchTo(TopMemoryContext); @@ -138,6 +141,17 @@ smgr_register(const f_smgr *smgr, Size smgrrelation_size) return my_id; } +SMgrId +smgr_lookup(const char *name) +{ + for (int i = 0; i < NSmgr; i++) + { + if (strcmp(smgrsw[i].name, name) == 0) + return i; + } + elog(FATAL, "Storage manager not found with name: %s", name); +} + /* * smgrinit(), smgrshutdown() -- Initialize or shut down storage * managers. @@ -176,6 +190,22 @@ smgrshutdown(int code, Datum arg) } } +#define SMGR_CHAIN_LOOKUP(SMGR_METHOD) \ + do \ + { \ + while (chain_index < reln->smgr_chain.size && smgrsw[reln->smgr_chain.chain[chain_index]].SMGR_METHOD == NULL) \ + chain_index++; \ + Assert(chain_index < reln->smgr_chain.size); \ + } while (0) + +void +smgr_open_next(SMgrRelation reln, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_open); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_open(reln, chain_index); +} + /* * smgropen() -- Return an SMgrRelation object, creating it if need be. * @@ -229,10 +259,10 @@ smgropen(RelFileLocator rlocator, ProcNumber backend) for (int i = 0; i <= MAX_FORKNUM; ++i) reln->smgr_cached_nblocks[i] = InvalidBlockNumber; - reln->smgr_which = storage_manager_id; + memcpy(&reln->smgr_chain, &storage_manager_chain, sizeof(SMgrChain)); /* implementation-specific initialization */ - smgrsw[reln->smgr_which].smgr_open(reln); + smgr_open_next(reln, 0); /* it is not pinned yet */ reln->pincount = 0; @@ -270,6 +300,14 @@ smgrunpin(SMgrRelation reln) dlist_push_tail(&unpinned_relns, &reln->node); } +void +smgr_close_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_close); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_close(reln, forknum, chain_index); +} + /* * smgrdestroy() -- Delete an SMgrRelation object. */ @@ -281,7 +319,7 @@ smgrdestroy(SMgrRelation reln) Assert(reln->pincount == 0); for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[reln->smgr_which].smgr_close(reln, forknum); + smgr_close_next(reln, forknum, 0); dlist_delete(&reln->node); @@ -301,7 +339,7 @@ smgrrelease(SMgrRelation reln) { for (ForkNumber forknum = 0; forknum <= MAX_FORKNUM; forknum++) { - smgrsw[reln->smgr_which].smgr_close(reln, forknum); + smgr_close_next(reln, forknum, 0); reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; } reln->smgr_targblock = InvalidBlockNumber; @@ -391,13 +429,29 @@ smgrreleaserellocator(RelFileLocatorBackend rlocator) smgrrelease(reln); } +bool +smgr_exists_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_exists); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_exists(reln, forknum, chain_index); +} + /* * smgrexists() -- Does the underlying file for a fork exist? */ bool smgrexists(SMgrRelation reln, ForkNumber forknum) { - return smgrsw[reln->smgr_which].smgr_exists(reln, forknum); + return smgr_exists_next(reln, forknum, 0); +} + +void +smgr_create_next(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_create); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_create(relold, reln, forknum, isRedo, chain_index); } /* @@ -410,7 +464,15 @@ smgrexists(SMgrRelation reln, ForkNumber forknum) void smgrcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo) { - smgrsw[reln->smgr_which].smgr_create(relold, reln, forknum, isRedo); + smgr_create_next(relold, reln, forknum, isRedo, 0); +} + +void +smgr_immedsync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_immedsync); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_immedsync(reln, forknum, chain_index); } /* @@ -438,16 +500,22 @@ smgrdosyncall(SMgrRelation *rels, int nrels) */ for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) { - if (smgrsw[which].smgr_exists(rels[i], forknum)) - smgrsw[which].smgr_immedsync(rels[i], forknum); + if (smgr_exists_next(rels[i], forknum, 0)) + smgr_immedsync_next(rels[i], forknum, 0); } } } +void +smgr_unlink_next(SMgrRelation reln, RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_unlink); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_unlink(rlocator, forknum, isRedo, chain_index); +} + /* * smgrdounlinkall() -- Immediately unlink all forks of all given relations * @@ -482,13 +550,12 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { RelFileLocatorBackend rlocator = rels[i]->smgr_rlocator; - int which = rels[i]->smgr_which; rlocators[i] = rlocator; /* Close the forks at smgr level */ for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_close(rels[i], forknum); + smgr_close_next(rels[i], forknum, 0); } /* @@ -512,15 +579,22 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo) for (i = 0; i < nrels; i++) { - int which = rels[i]->smgr_which; - for (forknum = 0; forknum <= MAX_FORKNUM; forknum++) - smgrsw[which].smgr_unlink(rlocators[i], forknum, isRedo); + smgr_unlink_next(rels[i], rlocators[i], forknum, isRedo, 0); } pfree(rlocators); } +void +smgr_extend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_extend); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_extend(reln, forknum, blocknum, + buffer, skipFsync, chain_index); +} /* * smgrextend() -- Add a new block to a file. @@ -535,8 +609,7 @@ void smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void *buffer, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum, - buffer, skipFsync); + smgr_extend_next(reln, forknum, blocknum, buffer, skipFsync, 0); /* * Normally we expect this to increase nblocks by one, but if the cached @@ -549,6 +622,16 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; } +void +smgr_zeroextend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, bool skipFsync, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_zeroextend); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_zeroextend(reln, forknum, blocknum, + nblocks, skipFsync, chain_index); +} + /* * smgrzeroextend() -- Add new zeroed out blocks to a file. * @@ -560,8 +643,7 @@ void smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_zeroextend(reln, forknum, blocknum, - nblocks, skipFsync); + smgr_zeroextend_next(reln, forknum, blocknum, nblocks, skipFsync, 0); /* * Normally we expect this to increase the fork size by nblocks, but if @@ -574,6 +656,16 @@ smgrzeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber; } +bool +smgr_prefetch_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_prefetch); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_prefetch(reln, forknum, blocknum, + nblocks, chain_index); +} + /* * smgrprefetch() -- Initiate asynchronous read of the specified block of a relation. * @@ -585,7 +677,16 @@ bool smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks) { - return smgrsw[reln->smgr_which].smgr_prefetch(reln, forknum, blocknum, nblocks); + return smgr_prefetch_next(reln, forknum, blocknum, nblocks, 0); +} + +uint32 +smgr_maxcombine_next(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_maxcombine); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_maxcombine(reln, forknum, blocknum, chain_index); } /* @@ -598,7 +699,17 @@ uint32 smgrmaxcombine(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) { - return smgrsw[reln->smgr_which].smgr_maxcombine(reln, forknum, blocknum); + return smgr_maxcombine_next(reln, forknum, blocknum, 0); +} + +void +smgr_readv_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_readv); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_readv(reln, forknum, blocknum, + buffers, nblocks, chain_index); } /* @@ -616,8 +727,17 @@ void smgrreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void **buffers, BlockNumber nblocks) { - smgrsw[reln->smgr_which].smgr_readv(reln, forknum, blocknum, buffers, - nblocks); + smgr_readv_next(reln, forknum, blocknum, buffers, nblocks, 0); +} + +void +smgr_writev_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_writev); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_writev(reln, forknum, blocknum, + buffers, nblocks, skipFsync, chain_index); } /* @@ -650,8 +770,17 @@ void smgrwritev(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, bool skipFsync) { - smgrsw[reln->smgr_which].smgr_writev(reln, forknum, blocknum, - buffers, nblocks, skipFsync); + smgr_writev_next(reln, forknum, blocknum, + buffers, nblocks, skipFsync, 0); +} + +void +smgr_writeback_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + BlockNumber nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_writeback); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_writeback(reln, forknum, blocknum, nblocks, chain_index); } /* @@ -662,8 +791,15 @@ void smgrwriteback(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, BlockNumber nblocks) { - smgrsw[reln->smgr_which].smgr_writeback(reln, forknum, blocknum, - nblocks); + smgr_writeback_next(reln, forknum, blocknum, nblocks, 0); +} + +extern BlockNumber +smgr_nblocks_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_nblocks); + + return smgrsw[reln->smgr_chain.chain[chain_index]].smgr_nblocks(reln, forknum, chain_index); } /* @@ -680,7 +816,7 @@ smgrnblocks(SMgrRelation reln, ForkNumber forknum) if (result != InvalidBlockNumber) return result; - result = smgrsw[reln->smgr_which].smgr_nblocks(reln, forknum); + result = smgr_nblocks_next(reln, forknum, 0); reln->smgr_cached_nblocks[forknum] = result; @@ -708,6 +844,14 @@ smgrnblocks_cached(SMgrRelation reln, ForkNumber forknum) return InvalidBlockNumber; } +void +smgr_truncate_next(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_truncate); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_truncate(reln, forknum, curnblk, nblocks, chain_index); +} + /* * smgrtruncate() -- Truncate the given forks of supplied relation to * each specified numbers of blocks @@ -752,8 +896,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, /* Make the cached size is invalid if we encounter an error. */ reln->smgr_cached_nblocks[forknum[i]] = InvalidBlockNumber; - smgrsw[reln->smgr_which].smgr_truncate(reln, forknum[i], - old_nblocks[i], nblocks[i]); + smgr_truncate_next(reln, forknum[i], old_nblocks[i], nblocks[i], 0); /* * We might as well update the local smgr_cached_nblocks values. The @@ -766,6 +909,14 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, } } +void +smgr_registersync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index) +{ + SMGR_CHAIN_LOOKUP(smgr_registersync); + + smgrsw[reln->smgr_chain.chain[chain_index]].smgr_registersync(reln, forknum, chain_index); +} + /* * smgrregistersync() -- Request a relation to be sync'd at next checkpoint * @@ -781,7 +932,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, void smgrregistersync(SMgrRelation reln, ForkNumber forknum) { - smgrsw[reln->smgr_which].smgr_registersync(reln, forknum); + smgr_registersync_next(reln, forknum, 0); } /* @@ -813,7 +964,7 @@ smgrregistersync(SMgrRelation reln, ForkNumber forknum) void smgrimmedsync(SMgrRelation reln, ForkNumber forknum) { - smgrsw[reln->smgr_which].smgr_immedsync(reln, forknum); + smgr_immedsync_next(reln, forknum, 0); } /* diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 55ab2da299b9..e806a4528aee 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4072,6 +4072,8 @@ PostgresSingleUserMain(int argc, char *argv[], */ process_shared_preload_libraries(); + process_smgr_chain(); + /* Initialize MaxBackends */ InitializeMaxBackends(); diff --git a/src/backend/utils/init/miscinit.c b/src/backend/utils/init/miscinit.c index 1b3ce51cfce4..32d99e1244a9 100644 --- a/src/backend/utils/init/miscinit.c +++ b/src/backend/utils/init/miscinit.c @@ -56,6 +56,7 @@ #include "utils/pidfile.h" #include "utils/syscache.h" #include "utils/varlena.h" +#include "storage/smgr.h" #define DIRECTORY_LOCK_FILE "postmaster.pid" @@ -1834,6 +1835,8 @@ char *session_preload_libraries_string = NULL; char *shared_preload_libraries_string = NULL; char *local_preload_libraries_string = NULL; +char *smgr_chain_string = NULL; + /* Flag telling that we are loading shared_preload_libraries */ bool process_shared_preload_libraries_in_progress = false; bool process_shared_preload_libraries_done = false; @@ -1910,6 +1913,62 @@ process_shared_preload_libraries(void) process_shared_preload_libraries_done = true; } +void +process_smgr_chain(void) +{ + char *rawstring; + List *elemlist; + ListCell *l; + uint8 idx = 0; + + if (smgr_chain_string == NULL || smgr_chain_string[0] == '\0') + return; /* nothing to do */ + + /* Need a modifiable copy of string */ + rawstring = pstrdup(smgr_chain_string); + + /* Parse string into list of filename paths */ + if (!SplitIdentifierString(rawstring, ',', &elemlist)) + { + /* syntax error in list */ + pfree(rawstring); + ereport(LOG, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid list syntax in parameter \"%s\"", + "smgr_chain"))); + return; + } + + foreach(l, elemlist) + { + char *smgrname = (char *) lfirst(l); + SMgrId id = smgr_lookup(smgrname); + + storage_manager_chain.chain[idx++] = id; + + ereport(DEBUG1, + (errmsg_internal("using storage manager in chain \"%s\"", smgrname))); + } + + for (int i = 0; i < idx; ++i) + { + int chain_position = smgrsw[storage_manager_chain.chain[i]].chain_position; + + if (i == idx - 1 && chain_position != SMGR_CHAIN_TAIL) + ereport(FATAL, + (errmsg_internal("smgr_chain: the last element should be a `tail` implementation, not a modifier."))); + + if (i != idx - 1 && chain_position != SMGR_CHAIN_MODIFIER) + ereport(FATAL, + (errmsg_internal("smgr_chain: element %i/%i %s is not a modifier.", i, idx, smgrsw[storage_manager_chain.chain[i]].name))); + } + + storage_manager_chain.size = idx; + + list_free(elemlist); + pfree(rawstring); +} + /* * process any libraries that should be preloaded at backend start */ @@ -1932,7 +1991,9 @@ register_builtin_dynamic_managers(void) { mdsmgr_register(); - storage_manager_id = MdSMgrId; + /* setup a dummy chain with md, for tools */ + storage_manager_chain.chain[0] = MdSMgrId; + storage_manager_chain.size = 1; } /* diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 9c0b10ad4dc2..ad578de73a44 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -4408,6 +4408,17 @@ struct config_string ConfigureNamesString[] = NULL, NULL, NULL }, + { + {"smgr_chain", PGC_POSTMASTER, CLIENT_CONN_PRELOAD, + gettext_noop("Lists storage managers used by the server, in order."), + NULL, + GUC_LIST_INPUT | GUC_LIST_QUOTE | GUC_SUPERUSER_ONLY + }, + &smgr_chain_string, + "md", + NULL, NULL, NULL + }, + { {"search_path", PGC_USERSET, CLIENT_CONN_STATEMENT, gettext_noop("Sets the schema search order for names that are not schema-qualified."), diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 8de86e0c9454..e8d56f5426e0 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -790,6 +790,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate #session_preload_libraries = '' #shared_preload_libraries = '' # (change requires restart) #jit_provider = 'llvmjit' # JIT library to use +#smgr_chain = 'md' # SMGR implementations to use # - Other Defaults - diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h index 7be0983772bb..7bdb760f5c1c 100644 --- a/src/include/miscadmin.h +++ b/src/include/miscadmin.h @@ -513,6 +513,7 @@ extern PGDLLIMPORT bool process_shmem_requests_in_progress; extern PGDLLIMPORT char *session_preload_libraries_string; extern PGDLLIMPORT char *shared_preload_libraries_string; extern PGDLLIMPORT char *local_preload_libraries_string; +extern PGDLLIMPORT char *smgr_chain_string; extern void CreateDataDirLockFile(bool amPostmaster); extern void CreateSocketLockFile(const char *socketfile, bool amPostmaster, @@ -523,6 +524,7 @@ extern bool RecheckDataDirLockFile(void); extern void ValidatePgVersion(const char *path); extern void register_builtin_dynamic_managers(void); extern void process_shared_preload_libraries(void); +extern void process_smgr_chain(void); extern void process_session_preload_libraries(void); extern void process_shmem_requests(void); extern void pg_bindtextdomain(const char *domain); diff --git a/src/include/storage/md.h b/src/include/storage/md.h index 61c0e85dd745..5b4992c0855b 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -23,34 +23,6 @@ extern void mdsmgr_register(void); extern SMgrId MdSMgrId; -/* md storage manager functionality */ -extern void mdinit(void); -extern void mdopen(SMgrRelation reln); -extern void mdclose(SMgrRelation reln, ForkNumber forknum); -extern void mdcreate(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo); -extern bool mdexists(SMgrRelation reln, ForkNumber forknum); -extern void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo); -extern void mdextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); -extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); -extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); -extern uint32 mdmaxcombine(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); -extern void mdreadv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks); -extern void mdwritev(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, - const void **buffers, BlockNumber nblocks, bool skipFsync); -extern void mdwriteback(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); -extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum); -extern void mdtruncate(SMgrRelation reln, ForkNumber forknum, - BlockNumber old_blocks, BlockNumber nblocks); -extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum); -extern void mdregistersync(SMgrRelation reln, ForkNumber forknum); - extern void ForgetDatabaseSyncRequests(Oid dbid); extern void DropRelationFiles(RelFileLocator *delrels, int ndelrels, bool isRedo); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 5b2b6de91c4d..8f789cb7f801 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -20,7 +20,17 @@ typedef uint8 SMgrId; -extern PGDLLIMPORT SMgrId storage_manager_id; +typedef uint8 SmgrChainIndex; + +#define MAX_SMGR_CHAIN 15 + +typedef struct +{ + SMgrId chain[MAX_SMGR_CHAIN]; /* storage manager selector */ + uint8 size; +} SMgrChain; + +extern PGDLLIMPORT SMgrChain storage_manager_chain; /* * smgr.c maintains a table of SMgrRelation objects, which are essentially @@ -55,7 +65,7 @@ typedef struct SMgrRelationData * Fields below here are intended to be private to smgr.c and its * submodules. Do not touch them from elsewhere. */ - SMgrId smgr_which; /* storage manager selector */ + SMgrChain smgr_chain; /* selected storage manager chain */ /* * Pinning support. If unpinned (ie. pincount == 0), 'node' is a list @@ -70,6 +80,9 @@ typedef SMgrRelationData *SMgrRelation; #define SmgrIsTemp(smgr) \ RelFileLocatorBackendIsTemp((smgr)->smgr_rlocator) +#define SMGR_CHAIN_TAIL 1 +#define SMGR_CHAIN_MODIFIER 2 + /* * This struct of function pointers defines the API between smgr.c and * any individual storage manager module. Note that smgr subfunctions are @@ -83,40 +96,44 @@ typedef SMgrRelationData *SMgrRelation; typedef struct f_smgr { const char *name; + int chain_position; void (*smgr_init) (void); /* may be NULL */ void (*smgr_shutdown) (void); /* may be NULL */ - void (*smgr_open) (SMgrRelation reln); - void (*smgr_close) (SMgrRelation reln, ForkNumber forknum); + void (*smgr_open) (SMgrRelation reln, SmgrChainIndex chain_index); + void (*smgr_close) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); void (*smgr_create) (RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, - bool isRedo); - bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum); + bool isRedo, SmgrChainIndex chain_index); + bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); void (*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum, - bool isRedo); + bool isRedo, SmgrChainIndex chain_index); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, const void *buffer, bool skipFsync); + BlockNumber blocknum, const void *buffer, bool skipFsync, SmgrChainIndex chain_index); void (*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks, bool skipFsync); + BlockNumber blocknum, int nblocks, bool skipFsync, SmgrChainIndex chain_index); bool (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, int nblocks); + BlockNumber blocknum, int nblocks, SmgrChainIndex chain_index); uint32 (*smgr_maxcombine) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum); + BlockNumber blocknum, SmgrChainIndex chain_index); void (*smgr_readv) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - void **buffers, BlockNumber nblocks); + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); void (*smgr_writev) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const void **buffers, BlockNumber nblocks, - bool skipFsync); + bool skipFsync, SmgrChainIndex chain_index); void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, BlockNumber nblocks); - BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); + BlockNumber blocknum, BlockNumber nblocks, SmgrChainIndex chain_index); + BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, - BlockNumber old_blocks, BlockNumber nblocks); - void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum); - void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum); + BlockNumber old_blocks, BlockNumber nblocks, SmgrChainIndex chain_index); + void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); + void (*smgr_registersync) (SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); } f_smgr; extern SMgrId smgr_register(const f_smgr *smgr, Size smgrrelation_size); +extern SMgrId smgr_lookup(const char *name); + +extern f_smgr *smgrsw; extern void smgrinit(void); extern SMgrRelation smgropen(RelFileLocator rlocator, ProcNumber backend); @@ -158,6 +175,46 @@ extern void smgrregistersync(SMgrRelation reln, ForkNumber forknum); extern void AtEOXact_SMgr(void); extern bool ProcessBarrierSmgrRelease(void); +extern void + smgr_open_next(SMgrRelation reln, SmgrChainIndex chain_index); +extern void + smgr_close_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern bool + smgr_exists_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_create_next(RelFileLocator relold, SMgrRelation reln, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +extern void + smgr_immedsync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_unlink_next(SMgrRelation reln, RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo, SmgrChainIndex chain_index); +extern void + smgr_extend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void *buffer, bool skipFsync, SmgrChainIndex chain_index); +extern void + smgr_zeroextend_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, bool skipFsync, SmgrChainIndex chain_index); +extern bool + smgr_prefetch_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + int nblocks, SmgrChainIndex chain_index); +extern uint32 + smgr_maxcombine_next(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum, SmgrChainIndex chain_index); +extern void + smgr_readv_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + void **buffers, BlockNumber nblocks, SmgrChainIndex chain_index); +extern void + smgr_writev_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + const void **buffers, BlockNumber nblocks, bool skipFsync, SmgrChainIndex chain_index); +extern void + smgr_writeback_next(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, + BlockNumber nblocks, SmgrChainIndex chain_index); +extern BlockNumber + smgr_nblocks_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); +extern void + smgr_truncate_next(SMgrRelation reln, ForkNumber forknum, BlockNumber curnblk, BlockNumber nblocks, SmgrChainIndex chain_index); +extern void + smgr_registersync_next(SMgrRelation reln, ForkNumber forknum, SmgrChainIndex chain_index); + static inline void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, void *buffer) diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index ba06315090f9..3b04dd39a801 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2557,6 +2557,7 @@ SID_IDENTIFIER_AUTHORITY SID_NAME_USE SISeg SIZE_T +SMgrChain SMgrRelation SMgrRelationData SMgrSortArray