Skip to content

Commit f8c183a

Browse files
committed
Speed up CREATE DATABASE by deferring the fsyncs until after copying
all the data and using posix_fadvise to nudge the OS into flushing it earlier. This also hopefully makes CREATE DATABASE avoid spamming the cache. Tests show a big speedup on Linux at least on some filesystems. Idea and patch from Andres Freund.
1 parent e26c539 commit f8c183a

File tree

3 files changed

+76
-29
lines changed

3 files changed

+76
-29
lines changed

src/backend/storage/file/fd.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.153 2010/01/12 02:42:52 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.154 2010/02/15 00:50:57 stark Exp $
1111
*
1212
* NOTES:
1313
*
@@ -319,6 +319,22 @@ pg_fdatasync(int fd)
319319
return 0;
320320
}
321321

322+
/*
323+
* pg_flush_data --- advise OS that the data described won't be needed soon
324+
*
325+
* Not all platforms have posix_fadvise; treat as noop if not available.
326+
*/
327+
int
328+
pg_flush_data(int fd, off_t offset, off_t amount)
329+
{
330+
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
331+
return posix_fadvise(fd, offset, amount, POSIX_FADV_DONTNEED);
332+
#else
333+
return 0;
334+
#endif
335+
}
336+
337+
322338
/*
323339
* InitFileAccess --- initialize this module during backend startup
324340
*

src/include/storage/fd.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.66 2010/01/02 16:58:08 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.67 2010/02/15 00:50:57 stark Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -98,6 +98,7 @@ extern int pg_fsync(int fd);
9898
extern int pg_fsync_no_writethrough(int fd);
9999
extern int pg_fsync_writethrough(int fd);
100100
extern int pg_fdatasync(int fd);
101+
extern int pg_flush_data(int fd, off_t offset, off_t amount);
101102

102103
/* Filename components for OpenTemporaryFile */
103104
#define PG_TEMP_FILES_DIR "pgsql_tmp"

src/port/copydir.c

Lines changed: 57 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* as a service.
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.25 2010/02/14 17:50:52 stark Exp $
14+
* $PostgreSQL: pgsql/src/port/copydir.c,v 1.26 2010/02/15 00:50:57 stark Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -37,6 +37,7 @@
3737

3838

3939
static void copy_file(char *fromfile, char *tofile);
40+
static void fsync_fname(char *fname);
4041

4142

4243
/*
@@ -91,27 +92,32 @@ copydir(char *fromdir, char *todir, bool recurse)
9192
copy_file(fromfile, tofile);
9293
}
9394

94-
FreeDir(xldir);
95-
9695
/*
97-
* fsync the directory to make sure not just the data but also the
98-
* new directory file entries have reached the disk. While needed
99-
* by most filesystems, the window got bigger with newer ones like
100-
* ext4.
96+
* Be paranoid here and fsync all files to ensure we catch problems.
10197
*/
102-
dirfd = BasicOpenFile(todir,
103-
O_RDONLY | PG_BINARY,
104-
S_IRUSR | S_IWUSR);
105-
if(dirfd == -1)
106-
ereport(ERROR,
107-
(errcode_for_file_access(),
108-
errmsg("could not open directory for fsync \"%s\": %m", todir)));
109-
110-
if(pg_fsync(dirfd) == -1)
98+
if (xldir == NULL)
11199
ereport(ERROR,
112100
(errcode_for_file_access(),
113-
errmsg("could not fsync directory \"%s\": %m", todir)));
114-
close(dirfd);
101+
errmsg("could not open directory \"%s\": %m", fromdir)));
102+
103+
while ((xlde = ReadDir(xldir, fromdir)) != NULL)
104+
{
105+
if (strcmp(xlde->d_name, ".") == 0 ||
106+
strcmp(xlde->d_name, "..") == 0)
107+
continue;
108+
109+
snprintf(tofile, MAXPGPATH, "%s/%s", todir, xlde->d_name);
110+
fsync_fname(tofile);
111+
}
112+
FreeDir(xldir);
113+
114+
/* It's important to fsync the destination directory itself as
115+
* individual file fsyncs don't guarantee that the directory entry
116+
* for the file is synced. Recent versions of ext4 have made the
117+
* window much wider but it's been true for ext3 and other
118+
* filesyetems in the past
119+
*/
120+
fsync_fname(todir);
115121
}
116122

117123
/*
@@ -124,6 +130,7 @@ copy_file(char *fromfile, char *tofile)
124130
int srcfd;
125131
int dstfd;
126132
int nbytes;
133+
off_t offset;
127134

128135
/* Use palloc to ensure we get a maxaligned buffer */
129136
#define COPY_BUF_SIZE (8 * BLCKSZ)
@@ -149,7 +156,7 @@ copy_file(char *fromfile, char *tofile)
149156
/*
150157
* Do the data copying.
151158
*/
152-
for (;;)
159+
for (offset=0; ; offset+=nbytes)
153160
{
154161
nbytes = read(srcfd, buffer, COPY_BUF_SIZE);
155162
if (nbytes < 0)
@@ -168,15 +175,14 @@ copy_file(char *fromfile, char *tofile)
168175
(errcode_for_file_access(),
169176
errmsg("could not write to file \"%s\": %m", tofile)));
170177
}
171-
}
172178

173-
/*
174-
* Be paranoid here to ensure we catch problems.
175-
*/
176-
if (pg_fsync(dstfd) != 0)
177-
ereport(ERROR,
178-
(errcode_for_file_access(),
179-
errmsg("could not fsync file \"%s\": %m", tofile)));
179+
/*
180+
* We fsync the files later but first flush them to avoid spamming
181+
* the cache and hopefully get the kernel to start writing them
182+
* out before the fsync comes.
183+
*/
184+
pg_flush_data(dstfd, offset, nbytes);
185+
}
180186

181187
if (close(dstfd))
182188
ereport(ERROR,
@@ -187,3 +193,27 @@ copy_file(char *fromfile, char *tofile)
187193

188194
pfree(buffer);
189195
}
196+
197+
198+
199+
/*
200+
* fsync a file
201+
*/
202+
static void
203+
fsync_fname(char *fname)
204+
{
205+
int fd = BasicOpenFile(fname,
206+
O_RDONLY | PG_BINARY,
207+
S_IRUSR | S_IWUSR);
208+
209+
if (fd < 0)
210+
ereport(ERROR,
211+
(errcode_for_file_access(),
212+
errmsg("could not open file \"%s\": %m", fname)));
213+
214+
if (pg_fsync(fd) != 0)
215+
ereport(ERROR,
216+
(errcode_for_file_access(),
217+
errmsg("could not fsync file \"%s\": %m", fname)));
218+
close(fd);
219+
}

0 commit comments

Comments
 (0)