Skip to content

Commit 917d70c

Browse files
committed
Fix handling new segments created in snapshot
1 parent e2eba00 commit 917d70c

File tree

6 files changed

+104
-48
lines changed

6 files changed

+104
-48
lines changed

src/backend/access/transam/xact.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1783,6 +1783,8 @@ StartTransaction(void)
17831783
TransactionState s;
17841784
VirtualTransactionId vxid;
17851785

1786+
ProcArrayTestDatabaseLock();
1787+
17861788
/*
17871789
* Let's just make sure the state stack is empty
17881790
*/

src/backend/storage/file/fd.c

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1501,6 +1501,12 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
15011501
return -1;
15021502
}
15031503
++nfile;
1504+
1505+
if (SFS_KEEPING_SNAPSHOT() && (fileFlags & O_EXCL))
1506+
{
1507+
OpenSnapshotFiles(vfdP, ControlFile->recent_snapshot, true);
1508+
pg_atomic_write_u32(&vfdP->snap_map->size, SFS_NEW_FILE_MARKER);
1509+
}
15041510
DO_DB(elog(LOG, "PathNameOpenFile: success %d",
15051511
vfdP->fd));
15061512

@@ -2039,6 +2045,10 @@ FileRead(File file, char *buffer, int amount, uint32 wait_event_info)
20392045
if (!OpenSnapshotFiles(vfdP, snap_id, false))
20402046
continue;
20412047

2048+
if (pg_atomic_read_u32(&vfdP->snap_map->size) == SFS_NEW_FILE_MARKER)
2049+
{
2050+
return 0; /* empty file */
2051+
}
20422052
offs = vfdP->snap_map->offs[vfdP->seekPos/BLCKSZ];
20432053
if (offs)
20442054
{
@@ -2183,7 +2193,8 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
21832193

21842194
OpenSnapshotFiles(vfdP, ControlFile->recent_snapshot, true);
21852195

2186-
if (!vfdP->snap_map->offs[block_no]) /* This block was not saved yet in this snapshot */
2196+
if (pg_atomic_read_u32(&vfdP->snap_map->size) != SFS_NEW_FILE_MARKER
2197+
&& vfdP->snap_map->offs[block_no] == 0) /* This block was not saved yet in this snapshot */
21872198
{
21882199
sfs_segment_offs_t snap_offs = pg_atomic_fetch_add_u32(&vfdP->snap_map->size, 1)*BLCKSZ;
21892200
char orig_block[BLCKSZ];
@@ -2373,7 +2384,7 @@ FileSeek(File file, off_t offset, int whence)
23732384
if (whence == SEEK_END)
23742385
{
23752386
SnapshotId snap_id;
2376-
SnapshotId current_snapshot = sfs_backend_snapshot != SFS_INVALID_SNAPSHOT ? sfs_backend_snapshot : ControlFile->active_snapshot;
2387+
SnapshotId current_snapshot = sfs_backend_snapshot != SFS_INVALID_SNAPSHOT ? sfs_backend_snapshot : ControlFile->active_snapshot;
23772388
if (current_snapshot != SFS_INVALID_SNAPSHOT)
23782389
{
23792390
if (current_snapshot < ControlFile->oldest_snapshot || current_snapshot > ControlFile->recent_snapshot)
@@ -2389,11 +2400,16 @@ FileSeek(File file, off_t offset, int whence)
23892400
if (!OpenSnapshotFiles(vfdP, snap_id, false))
23902401
continue;
23912402

2392-
for (i = RELSEG_SIZE; --i != 0;)
2403+
for (i = RELSEG_SIZE; --i >= 0;)
23932404
{
2394-
sfs_segment_offs_t offs = vfdP->snap_map->offs[i];
2395-
if (offs >= vfdP->seekPos)
2396-
vfdP->seekPos = offs + BLCKSZ - 1;
2405+
if (vfdP->snap_map->offs[i] != 0)
2406+
{
2407+
if ((i+1)*BLCKSZ - offset >= vfdP->seekPos)
2408+
{
2409+
vfdP->seekPos = (i+1)*BLCKSZ - offset;
2410+
}
2411+
break;
2412+
}
23972413
}
23982414
}
23992415
}
@@ -2453,7 +2469,8 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info)
24532469
char orig_block[BLCKSZ];
24542470
int rc;
24552471

2456-
if (!vfdP->snap_map->offs[block_no]) /* This block was not saved yet in this snapshot */
2472+
if (pg_atomic_read_u32(&vfdP->snap_map->size) != SFS_NEW_FILE_MARKER
2473+
&& !vfdP->snap_map->offs[block_no]) /* This block was not saved yet in this snapshot */
24572474
{
24582475
sfs_segment_offs_t snap_offs = pg_atomic_fetch_add_u32(&vfdP->snap_map->size, 1)*BLCKSZ;
24592476
vfdP->snap_map->offs[block_no] = snap_offs + 1;
@@ -3887,7 +3904,7 @@ sfs_remove_snapshot_file(const char *fname, bool isdir, int elevel)
38873904
&& snap_id <= sfs_current_snapshot)
38883905
{
38893906
if (unlink(fname) != 0)
3890-
elog(elevel, "Failed to remove file %s: %m", fname);
3907+
elog(elevel, "Failed to remove snapshot file %s: %m", fname);
38913908
}
38923909
}
38933910
}
@@ -3907,7 +3924,7 @@ sfs_remove_applied_snapshot_file(const char *fname, bool isdir, int elevel)
39073924
&& snap_id >= sfs_current_snapshot)
39083925
{
39093926
if (unlink(fname) != 0)
3910-
elog(elevel, "Failed to remove file %s: %m", fname);
3927+
elog(elevel, "Failed to remove applied snapshot file %s: %m", fname);
39113928
}
39123929
}
39133930
}
@@ -3935,25 +3952,34 @@ sfs_recover_snapshot_file(const char *fname, bool isdir, int elevel)
39353952
if (!OpenSnapshotFiles(vfdP, snap_id, false))
39363953
elog(ERROR, "[SFS] Failed to open snapshot files");
39373954

3938-
for (i = 0; i < RELSEG_SIZE; i++)
3955+
if (pg_atomic_read_u32(&vfdP->snap_map->size) == SFS_NEW_FILE_MARKER)
3956+
{
3957+
/* This file was created in this snapshot, so just remove it */
3958+
if (unlink(fname) != 0)
3959+
elog(elevel, "Failed to remove file %s: %m", fname);
3960+
}
3961+
else
39393962
{
3940-
sfs_segment_offs_t offs = vfdP->snap_map->offs[i];
3941-
if (offs)
3963+
for (i = 0; i < RELSEG_SIZE; i++)
39423964
{
3943-
char orig_block[BLCKSZ];
3944-
offs -= 1;
3965+
sfs_segment_offs_t offs = vfdP->snap_map->offs[i];
3966+
if (offs)
3967+
{
3968+
char orig_block[BLCKSZ];
3969+
offs -= 1;
39453970

3946-
if (lseek(vfdP->snap_fd, offs, SEEK_SET) != offs)
3947-
elog(ERROR, "[SFS] Could not seek file: %m");
3971+
if (lseek(vfdP->snap_fd, offs, SEEK_SET) != offs)
3972+
elog(ERROR, "[SFS] Could not seek file: %m");
39483973

3949-
if (sfs_read_file(vfdP->snap_fd, orig_block, BLCKSZ) != BLCKSZ)
3950-
elog(ERROR, "[SFS] Could not read file: %m");
3974+
if (sfs_read_file(vfdP->snap_fd, orig_block, BLCKSZ) != BLCKSZ)
3975+
elog(ERROR, "[SFS] Could not read file: %m");
39513976

3952-
if (lseek(vfdP->fd, i*BLCKSZ, SEEK_SET) != i*BLCKSZ)
3953-
elog(ERROR, "[SFS] Could not seek file: %m");
3977+
if (lseek(vfdP->fd, i*BLCKSZ, SEEK_SET) != i*BLCKSZ)
3978+
elog(ERROR, "[SFS] Could not seek file: %m");
39543979

3955-
if (!sfs_write_file(vfdP->fd, orig_block, BLCKSZ))
3956-
elog(ERROR, "[SFS] Could not write file: %m");
3980+
if (!sfs_write_file(vfdP->fd, orig_block, BLCKSZ))
3981+
elog(ERROR, "[SFS] Could not write file: %m");
3982+
}
39573983
}
39583984
}
39593985

src/backend/storage/file/snapfs.c

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -217,38 +217,14 @@ void
217217
sfs_lock_database(void)
218218
{
219219
if (!InRecovery)
220-
{
221-
bool standalone = false;
222-
TransactionId myXid = GetCurrentTransactionIdIfAny();
223-
224-
/* Prevent assignment Xids to transaction and
225-
* so delay start of any new update transactions
226-
*/
227-
LWLockAcquire(XidGenLock, LW_SHARED);
228-
229-
/* Wait completion of all active tranasction except own */
230-
do
231-
{
232-
RunningTransactions running = GetRunningTransactionData();
233-
standalone = (TransactionIdIsValid(myXid) && running->xcnt == 1) || (!TransactionIdIsValid(myXid) && running->xcnt == 0);
234-
235-
/* Release locks set by GetRunningTransactionData */
236-
LWLockRelease(ProcArrayLock);
237-
LWLockRelease(XidGenLock);
238-
239-
/* Wait for one second */
240-
if (!standalone)
241-
pg_usleep(USECS_PER_SEC);
242-
243-
} while (!standalone);
244-
}
220+
ProcArrayLockDatabase();
245221
}
246222

247223
void
248224
sfs_unlock_database(void)
249225
{
250226
if (!InRecovery)
251-
LWLockRelease(XidGenLock);
227+
ProcArrayUnlockDatabase();
252228
}
253229

254230
void

src/backend/storage/ipc/procarray.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ typedef struct ProcArrayStruct
6767
{
6868
int numProcs; /* number of valid procs entries */
6969
int maxProcs; /* allocated size of procs array */
70+
int dbLock; /* flag preventing start of new transactions */
7071

7172
/*
7273
* Known assigned XIDs handling
@@ -3966,3 +3967,48 @@ KnownAssignedXidsReset(void)
39663967

39673968
LWLockRelease(ProcArrayLock);
39683969
}
3970+
3971+
#define DBLOCK_WAIT_TIMEOUT USECS_PER_SEC
3972+
3973+
void
3974+
ProcArrayLockDatabase(void)
3975+
{
3976+
bool standalone = false;
3977+
TransactionId myXid = GetCurrentTransactionIdIfAny();
3978+
volatile ProcArrayStruct* pas = procArray;
3979+
3980+
pas->dbLock = true;
3981+
/* Wait completion of all active tranasction except own */
3982+
do
3983+
{
3984+
RunningTransactions running = GetRunningTransactionData();
3985+
standalone = (TransactionIdIsValid(myXid) && running->xcnt == 1) || (!TransactionIdIsValid(myXid) && running->xcnt == 0);
3986+
3987+
/* Release locks set by GetRunningTransactionData */
3988+
LWLockRelease(ProcArrayLock);
3989+
LWLockRelease(XidGenLock);
3990+
3991+
/* Wait for one second */
3992+
if (!standalone)
3993+
pg_usleep(DBLOCK_WAIT_TIMEOUT);
3994+
3995+
} while (!standalone);
3996+
}
3997+
3998+
3999+
void
4000+
ProcArrayUnlockDatabase(void)
4001+
{
4002+
volatile ProcArrayStruct* pas = procArray;
4003+
pas->dbLock = false;
4004+
}
4005+
4006+
void
4007+
ProcArrayTestDatabaseLock(void)
4008+
{
4009+
volatile ProcArrayStruct* pas = procArray;
4010+
while (pas->dbLock)
4011+
{
4012+
pg_usleep(DBLOCK_WAIT_TIMEOUT);
4013+
}
4014+
}

src/include/storage/procarray.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,4 +124,8 @@ extern void ProcArraySetReplicationSlotXmin(TransactionId xmin,
124124
extern void ProcArrayGetReplicationSlotXmin(TransactionId *xmin,
125125
TransactionId *catalog_xmin);
126126

127+
extern void ProcArrayLockDatabase(void);
128+
extern void ProcArrayUnlockDatabase(void);
129+
extern void ProcArrayTestDatabaseLock(void);
130+
127131
#endif /* PROCARRAY_H */

src/include/storage/snapfs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ extern time_t sfs_get_snapshot_timestamp(SnapshotId sid);
6363
#define SFS_KEEPING_SNAPSHOT() (ControlFile->recent_snapshot >= ControlFile->oldest_snapshot)
6464
#define SFS_IN_SNAPSHOT() (sfs_backend_snapshot != SFS_INVALID_SNAPSHOT || ControlFile->active_snapshot != SFS_INVALID_SNAPSHOT)
6565

66+
#define SFS_NEW_FILE_MARKER (~0)
67+
6668
typedef uint32 sfs_segment_offs_t; /* segment size can not be greateer than 1Gb, so 4 bytes is enough */
6769

6870

0 commit comments

Comments
 (0)