Skip to content

Commit fd30ae2

Browse files
committed
[Issue #90] Remove stale .partial WAL files
1 parent f6ec367 commit fd30ae2

File tree

1 file changed

+67
-2
lines changed

1 file changed

+67
-2
lines changed

src/archive.c

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress,
142142
const char *to_path_p;
143143
char to_path_temp[MAXPGPATH];
144144
int errno_temp;
145+
/* partial handling */
146+
int partial_timeout = 0;
147+
int partial_size = 0;
148+
struct stat st;
149+
bool partial_exists = false;
145150

146151
#ifdef HAVE_LIBZ
147152
char gz_to_path[MAXPGPATH];
@@ -180,8 +185,11 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress,
180185

181186
gz_out = fio_gzopen(to_path_temp, PG_BINARY_W, instance_config.compress_level, FIO_BACKUP_HOST);
182187
if (gz_out == NULL)
183-
elog(ERROR, "Cannot open destination temporary WAL file \"%s\": %s",
188+
{
189+
partial_exists = true;
190+
elog(WARNING, "Cannot open destination temporary WAL file \"%s\": %s",
184191
to_path_temp, strerror(errno));
192+
}
185193
}
186194
else
187195
#endif
@@ -190,8 +198,65 @@ push_wal_file(const char *from_path, const char *to_path, bool is_compress,
190198

191199
out = fio_open(to_path_temp, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FIO_BACKUP_HOST);
192200
if (out < 0)
193-
elog(ERROR, "Cannot open destination temporary WAL file \"%s\": %s",
201+
{
202+
partial_exists = true;
203+
elog(WARNING, "Cannot open destination temporary WAL file \"%s\": %s",
194204
to_path_temp, strerror(errno));
205+
}
206+
}
207+
208+
/* sleep a second, check if .partial file size is changing, if not, then goto p1
209+
* Algorihtm is not pretty however we do not expect conflict for '.partial' file
210+
* to be frequent occurrence.
211+
* The main goal is to protect against failed archive-push which left behind
212+
* orphan '.partial' file.
213+
*/
214+
if (partial_exists)
215+
{
216+
while (1)
217+
{
218+
/* exit from loop */
219+
if (partial_timeout > 10)
220+
{
221+
/* For 10 second the file didn`t changed its size, so consider it stale and reuse it */
222+
elog(WARNING, "Reusing stale destination temporary WAL file \"%s\"", to_path_temp);
223+
fio_unlink(to_path_temp, FIO_BACKUP_HOST);
224+
225+
#ifdef HAVE_LIBZ
226+
if (is_compress)
227+
{
228+
gz_out = fio_gzopen(to_path_temp, PG_BINARY_W, instance_config.compress_level, FIO_BACKUP_HOST);
229+
if (gz_out == NULL)
230+
elog(ERROR, "Cannot open destination temporary WAL file \"%s\": %s",
231+
to_path_temp, strerror(errno));
232+
}
233+
else
234+
#endif
235+
{
236+
out = fio_open(to_path_temp, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, FIO_BACKUP_HOST);
237+
if (out < 0)
238+
elog(ERROR, "Cannot open destination temporary WAL file \"%s\": %s",
239+
to_path_temp, strerror(errno));
240+
}
241+
break;
242+
}
243+
244+
if (fio_stat(to_path_temp, &st, false, FIO_BACKUP_HOST) < 0)
245+
/* It is ok if partial is gone, we can safely error out */
246+
elog(ERROR, "Cannot stat destination temporary WAL file \"%s\": %s", to_path_temp,
247+
strerror(errno));
248+
249+
/* first round */
250+
if (!partial_timeout)
251+
partial_size = st.st_size;
252+
253+
/* file size is changing */
254+
if (st.st_size > partial_size)
255+
elog(ERROR, "Destination temporary WAL file \"%s\" is not stale", to_path_temp);
256+
257+
sleep(1);
258+
partial_timeout++;
259+
}
195260
}
196261

197262
/* copy content */

0 commit comments

Comments
 (0)