Arrange to fsync the contents of lockfiles (both postmaster.pid and the
authorTom Lane <[email protected]>
Mon, 16 Aug 2010 17:33:01 +0000 (17:33 +0000)
committerTom Lane <[email protected]>
Mon, 16 Aug 2010 17:33:01 +0000 (17:33 +0000)
socket lockfile) when writing them.  The lack of an fsync here may well
explain two different reports we've seen of corrupted lockfile contents,
which doesn't particularly bother the running server but can prevent a
new server from starting if the old one crashes.  Per suggestion from
Alvaro.

Back-patch to all supported versions.

src/backend/utils/init/miscinit.c

index 55b395f9a2b0fbc79952d7864265d50ed86b93f1..35ef0de709a7ac16a906e6668e2912cf30bf5469 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.175.2.1 2009/12/09 21:58:04 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/init/miscinit.c,v 1.175.2.2 2010/08/16 17:33:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -879,6 +879,9 @@ CreateLockFile(const char *filename, bool amPostmaster,
                 * admin) but has left orphan backends behind.  Check for this by
                 * looking to see if there is an associated shmem segment that is
                 * still in use.
+                *
+                * Note: because postmaster.pid is written in two steps, we might not
+                * find the shmem ID values in it; we can't treat that as an error.
                 */
                if (isDDLock)
                {
@@ -942,7 +945,18 @@ CreateLockFile(const char *filename, bool amPostmaster,
                                (errcode_for_file_access(),
                                 errmsg("could not write lock file \"%s\": %m", filename)));
        }
-       if (close(fd))
+       if (pg_fsync(fd) != 0)
+       {
+               int                     save_errno = errno;
+
+               close(fd);
+               unlink(filename);
+               errno = save_errno;
+               ereport(FATAL,
+                               (errcode_for_file_access(),
+                                errmsg("could not write lock file \"%s\": %m", filename)));
+       }
+       if (close(fd) != 0)
        {
                int                     save_errno = errno;
 
@@ -1103,7 +1117,14 @@ RecordSharedMemoryInLockFile(unsigned long id1, unsigned long id2)
                close(fd);
                return;
        }
-       if (close(fd))
+       if (pg_fsync(fd) != 0)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not write to file \"%s\": %m",
+                                               DIRECTORY_LOCK_FILE)));
+       }
+       if (close(fd) != 0)
        {
                ereport(LOG,
                                (errcode_for_file_access(),