fd_set readmask;
int nSockets;
time_t now,
+ last_lockfile_recheck_time,
last_touch_time;
- last_touch_time = time(NULL);
+ last_lockfile_recheck_time = last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
if (StartWorkerNeeded || HaveCrashedWorker)
maybe_start_bgworker();
- /*
- * Touch Unix socket and lock files every 58 minutes, to ensure that
- * they are not removed by overzealous /tmp-cleaning tasks. We assume
- * no one runs cleaners with cutoff times of less than an hour ...
- */
- now = time(NULL);
- if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
- {
- TouchSocketFiles();
- TouchSocketLockFiles();
- last_touch_time = now;
- }
-
#ifdef HAVE_PTHREAD_IS_THREADED_NP
/*
/* reset flag so we don't SIGKILL again */
AbortStartTime = 0;
}
+
+ /*
+ * Lastly, check to see if it's time to do some things that we don't
+ * want to do every single time through the loop, because they're a
+ * bit expensive. Note that there's up to a minute of slop in when
+ * these tasks will be performed, since DetermineSleepTime() will let
+ * us sleep at most that long.
+ */
+ now = time(NULL);
+
+ /*
+ * Once a minute, verify that postmaster.pid hasn't been removed or
+ * overwritten. If it has, we force a shutdown. This avoids having
+ * postmasters and child processes hanging around after their database
+ * is gone, and maybe causing problems if a new database cluster is
+ * created in the same place. It also provides some protection
+ * against a DBA foolishly removing postmaster.pid and manually
+ * starting a new postmaster. Data corruption is likely to ensue from
+ * that anyway, but we can minimize the damage by aborting ASAP.
+ */
+ if (now - last_lockfile_recheck_time >= 1 * SECS_PER_MINUTE)
+ {
+ if (!RecheckDataDirLockFile())
+ {
+ ereport(LOG,
+ (errmsg("performing immediate shutdown because data directory lock file is invalid")));
+ kill(MyProcPid, SIGQUIT);
+ }
+ last_lockfile_recheck_time = now;
+ }
+
+ /*
+ * Touch Unix socket and lock files every 58 minutes, to ensure that
+ * they are not removed by overzealous /tmp-cleaning tasks. We assume
+ * no one runs cleaners with cutoff times of less than an hour ...
+ */
+ if (now - last_touch_time >= 58 * SECS_PER_MINUTE)
+ {
+ TouchSocketFiles();
+ TouchSocketLockFiles();
+ last_touch_time = now;
+ }
}
}
}
+/*
+ * Recheck that the data directory lock file still exists with expected
+ * content. Return TRUE if the lock file appears OK, FALSE if it isn't.
+ *
+ * We call this periodically in the postmaster. The idea is that if the
+ * lock file has been removed or replaced by another postmaster, we should
+ * do a panic database shutdown. Therefore, we should return TRUE if there
+ * is any doubt: we do not want to cause a panic shutdown unnecessarily.
+ * Transient failures like EINTR or ENFILE should not cause us to fail.
+ * (If there really is something wrong, we'll detect it on a future recheck.)
+ */
+bool
+RecheckDataDirLockFile(void)
+{
+ int fd;
+ int len;
+ long file_pid;
+ char buffer[BLCKSZ];
+
+ fd = open(DIRECTORY_LOCK_FILE, O_RDWR | PG_BINARY, 0);
+ if (fd < 0)
+ {
+ /*
+ * There are many foreseeable false-positive error conditions. For
+ * safety, fail only on enumerated clearly-something-is-wrong
+ * conditions.
+ */
+ switch (errno)
+ {
+ case ENOENT:
+ case ENOTDIR:
+ /* disaster */
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m",
+ DIRECTORY_LOCK_FILE)));
+ return false;
+ default:
+ /* non-fatal, at least for now */
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m; continuing anyway",
+ DIRECTORY_LOCK_FILE)));
+ return true;
+ }
+ }
+ len = read(fd, buffer, sizeof(buffer) - 1);
+ if (len < 0)
+ {
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not read from file \"%s\": %m",
+ DIRECTORY_LOCK_FILE)));
+ close(fd);
+ return true; /* treat read failure as nonfatal */
+ }
+ buffer[len] = '\0';
+ close(fd);
+ file_pid = atol(buffer);
+ if (file_pid == getpid())
+ return true; /* all is well */
+
+ /* Trouble: someone's overwritten the lock file */
+ ereport(LOG,
+ (errmsg("lock file \"%s\" contains wrong PID: %ld instead of %ld",
+ DIRECTORY_LOCK_FILE, file_pid, (long) getpid())));
+ return false;
+}
+
+
/*-------------------------------------------------------------------------
* Version checking support
*-------------------------------------------------------------------------
const char *socketDir);
extern void TouchSocketLockFiles(void);
extern void AddToDataDirLockFile(int target_line, const char *str);
+extern bool RecheckDataDirLockFile(void);
extern void ValidatePgVersion(const char *path);
extern void process_shared_preload_libraries(void);
extern void process_session_preload_libraries(void);