Code review for transaction commit timestamps
authorAlvaro Herrera <[email protected]>
Tue, 29 Sep 2015 17:40:56 +0000 (14:40 -0300)
committerAlvaro Herrera <[email protected]>
Tue, 29 Sep 2015 17:40:56 +0000 (14:40 -0300)
There are three main changes here:

1. No longer cause a start failure in a standby if the feature is
disabled in postgresql.conf but enabled in the master.  This reverts one
part of commit 4f3924d9cd43; what we keep is the ability of the standby
to activate/deactivate the module (which includes creating and removing
segments as appropriate) during replay of such actions in the master.

2. Replay WAL records affecting commitTS even if the feature is
disabled.  This means the standby will always have the same state as the
master after replay.

3. Have COMMIT PREPARE record the transaction commit time as well.  We
were previously only applying it in the normal transaction commit path.

Author: Petr JelĂ­nek
Discussion: http://www.postgresql.org/message-id/CAHGQGwHereDzzzmfxEBYcVQu3oZv6vZcgu1TPeERWbDc+gQ06g@mail.gmail.com
Discussion: http://www.postgresql.org/message-id/CAHGQGwFuzfO4JscM9LCAmCDCxp_MfLvN4QdB+xWsS-FijbjTYQ@mail.gmail.com

Additionally, I cleaned up nearby code related to replication origins,
which I found a bit hard to follow, and fixed a couple of typos.

Backpatch to 9.5, where this code was introduced.

Per bug reports from Fujii Masao and subsequent discussion.

src/backend/access/rmgrdesc/xlogdesc.c
src/backend/access/transam/commit_ts.c
src/backend/access/transam/twophase.c
src/backend/access/transam/xact.c
src/backend/access/transam/xlog.c
src/include/access/commit_ts.h

index 4f2913683a1a228b355c194ed45652e646c9ed88..83cc9e896ebcb9f08fc354e05d28d3edeb844261 100644 (file)
@@ -111,7 +111,7 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
                appendStringInfo(buf, "max_connections=%d max_worker_processes=%d "
                                                 "max_prepared_xacts=%d max_locks_per_xact=%d "
                                                 "wal_level=%s wal_log_hints=%s "
-                                                "track_commit_timestamps=%s",
+                                                "track_commit_timestamp=%s",
                                                 xlrec.MaxConnections,
                                                 xlrec.max_worker_processes,
                                                 xlrec.max_prepared_xacts,
index 33136e3c1d964cae58fd43f943e56951108f9fce..78090c5f09c798866735b005fe77607b9545f883 100644 (file)
@@ -122,29 +122,39 @@ static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids,
  * subtrans implementation changes in the future, we might want to revisit the
  * decision of storing timestamp info for each subxid.
  *
- * The do_xlog parameter tells us whether to include an XLog record of this
- * or not.  Normal path through RecordTransactionCommit() will be related
- * to a transaction commit XLog record, and so should pass "false" here.
- * Other callers probably want to pass true, so that the given values persist
- * in case of crashes.
+ * The replaying_xlog parameter indicates whether the module should execute
+ * its write even if the feature is nominally disabled, because we're replaying
+ * a record generated from a master where the feature is enabled.
+ *
+ * The write_xlog parameter tells us whether to include an XLog record of this
+ * or not.  Normally, this is called from transaction commit routines (both
+ * normal and prepared) and the information will be stored in the transaction
+ * commit XLog record, and so they should pass "false" for this.  The XLog redo
+ * code should use "false" here as well.  Other callers probably want to pass
+ * true, so that the given values persist in case of crashes.
  */
 void
 TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
                                                           TransactionId *subxids, TimestampTz timestamp,
-                                                          RepOriginId nodeid, bool do_xlog)
+                                                          RepOriginId nodeid,
+                                                          bool replaying_xlog, bool write_xlog)
 {
        int                     i;
        TransactionId headxid;
        TransactionId newestXact;
 
-       if (!track_commit_timestamp)
+       /* We'd better not try to write xlog during replay */
+       Assert(!(write_xlog && replaying_xlog));
+
+       /* No-op if feature not enabled, unless replaying WAL */
+       if (!track_commit_timestamp && !replaying_xlog)
                return;
 
        /*
         * Comply with the WAL-before-data rule: if caller specified it wants this
         * value to be recorded in WAL, do so before touching the data.
         */
-       if (do_xlog)
+       if (write_xlog)
                WriteSetTimestampXlogRec(xid, nsubxids, subxids, timestamp, nodeid);
 
        /*
@@ -906,7 +916,8 @@ commit_ts_redo(XLogReaderState *record)
                        subxids = NULL;
 
                TransactionTreeSetCommitTsData(setts->mainxid, nsubxids, subxids,
-                                                                        setts->timestamp, setts->nodeid, false);
+                                                                          setts->timestamp, setts->nodeid, false,
+                                                                          true);
                if (subxids)
                        pfree(subxids);
        }
index d48d101340ffa48fa23901905420ce131aa73eb1..e005cc558abdc719b6a1be18c807e7bdb7187e2d 100644 (file)
@@ -41,6 +41,7 @@
 #include <time.h>
 #include <unistd.h>
 
+#include "access/commit_ts.h"
 #include "access/htup_details.h"
 #include "access/subtrans.h"
 #include "access/transam.h"
@@ -56,8 +57,9 @@
 #include "miscadmin.h"
 #include "pg_trace.h"
 #include "pgstat.h"
-#include "replication/walsender.h"
+#include "replication/origin.h"
 #include "replication/syncrep.h"
+#include "replication/walsender.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 #include "storage/predicate.h"
@@ -2070,8 +2072,9 @@ RecoverPreparedTransactions(void)
 /*
  *     RecordTransactionCommitPrepared
  *
- * This is basically the same as RecordTransactionCommit: in particular,
- * we must set the delayChkpt flag to avoid a race condition.
+ * This is basically the same as RecordTransactionCommit (q.v. if you change
+ * this function): in particular, we must set the delayChkpt flag to avoid a
+ * race condition.
  *
  * We know the transaction made at least one XLOG entry (its PREPARE),
  * so it is never possible to optimize out the commit record.
@@ -2087,6 +2090,15 @@ RecordTransactionCommitPrepared(TransactionId xid,
                                                                bool initfileinval)
 {
        XLogRecPtr      recptr;
+       TimestampTz committs = GetCurrentTimestamp();
+       bool            replorigin;
+
+       /*
+        * Are we using the replication origins feature?  Or, in other words, are
+        * we replaying remote actions?
+        */
+       replorigin = (replorigin_session_origin != InvalidRepOriginId &&
+                                 replorigin_session_origin != DoNotReplicateId);
 
        START_CRIT_SECTION();
 
@@ -2094,12 +2106,33 @@ RecordTransactionCommitPrepared(TransactionId xid,
        MyPgXact->delayChkpt = true;
 
        /* Emit the XLOG commit record */
-       recptr = XactLogCommitRecord(GetCurrentTimestamp(),
+       recptr = XactLogCommitRecord(committs,
                                                                 nchildren, children, nrels, rels,
                                                                 ninvalmsgs, invalmsgs,
                                                                 initfileinval, false,
                                                                 xid);
 
+
+       if (replorigin)
+               /* Move LSNs forward for this replication origin */
+               replorigin_session_advance(replorigin_session_origin_lsn,
+                                                                  XactLastRecEnd);
+
+       /*
+        * Record commit timestamp.  The value comes from plain commit timestamp
+        * if replorigin is not enabled, or replorigin already set a value for us
+        * in replorigin_session_origin_timestamp otherwise.
+        *
+        * We don't need to WAL-log anything here, as the commit record written
+        * above already contains the data.
+        */
+       if (!replorigin || replorigin_session_origin_timestamp == 0)
+               replorigin_session_origin_timestamp = committs;
+
+       TransactionTreeSetCommitTsData(xid, nchildren, children,
+                                                                  replorigin_session_origin_timestamp,
+                                                                  replorigin_session_origin, false, false);
+
        /*
         * We don't currently try to sleep before flush here ... nor is there any
         * support for async commit of a prepared xact (the very idea is probably
index 068214d83e041a37605a162f95043c3ed0d73632..8f56a44d06e2c709f7180dcabcaa1a037f977cb3 100644 (file)
@@ -42,9 +42,9 @@
 #include "miscadmin.h"
 #include "pgstat.h"
 #include "replication/logical.h"
-#include "replication/walsender.h"
-#include "replication/syncrep.h"
 #include "replication/origin.h"
+#include "replication/syncrep.h"
+#include "replication/walsender.h"
 #include "storage/fd.h"
 #include "storage/lmgr.h"
 #include "storage/predicate.h"
@@ -1119,6 +1119,8 @@ AtSubStart_ResourceOwner(void)
  *
  * Returns latest XID among xact and its children, or InvalidTransactionId
  * if the xact has no XID.  (We compute that here just because it's easier.)
+ *
+ * If you change this function, see RecordTransactionCommitPrepared also.
  */
 static TransactionId
 RecordTransactionCommit(void)
@@ -1172,6 +1174,15 @@ RecordTransactionCommit(void)
        }
        else
        {
+               bool            replorigin;
+
+               /*
+                * Are we using the replication origins feature?  Or, in other words,
+                * are we replaying remote actions?
+                */
+               replorigin = (replorigin_session_origin != InvalidRepOriginId &&
+                                         replorigin_session_origin != DoNotReplicateId);
+
                /*
                 * Begin commit critical section and insert the commit XLOG record.
                 */
@@ -1206,26 +1217,28 @@ RecordTransactionCommit(void)
                                                        RelcacheInitFileInval, forceSyncCommit,
                                                        InvalidTransactionId /* plain commit */ );
 
-               /*
-                * Record plain commit ts if not replaying remote actions, or if no
-                * timestamp is configured.
-                */
-               if (replorigin_session_origin == InvalidRepOriginId ||
-                       replorigin_session_origin == DoNotReplicateId ||
-                       replorigin_session_origin_timestamp == 0)
-                       replorigin_session_origin_timestamp = xactStopTimestamp;
-               else
+               if (replorigin)
+                       /* Move LSNs forward for this replication origin */
                        replorigin_session_advance(replorigin_session_origin_lsn,
                                                                           XactLastRecEnd);
 
                /*
-                * We don't need to WAL log origin or timestamp here, the commit
-                * record contains all the necessary information and will redo the SET
-                * action during replay.
+                * Record commit timestamp.  The value comes from plain commit
+                * timestamp if there's no replication origin; otherwise, the
+                * timestamp was already set in replorigin_session_origin_timestamp by
+                * replication.
+                *
+                * We don't need to WAL-log anything here, as the commit record
+                * written above already contains the data.
                 */
+
+               if (!replorigin || replorigin_session_origin_timestamp == 0)
+                       replorigin_session_origin_timestamp = xactStopTimestamp;
+
                TransactionTreeSetCommitTsData(xid, nchildren, children,
                                                                           replorigin_session_origin_timestamp,
-                                                                          replorigin_session_origin, false);
+                                                                          replorigin_session_origin,
+                                                                          false, false);
        }
 
        /*
@@ -5321,7 +5334,7 @@ xact_redo_commit(xl_xact_parsed_commit *parsed,
        /* Set the transaction commit timestamp and metadata */
        TransactionTreeSetCommitTsData(xid, parsed->nsubxacts, parsed->subxacts,
                                                                   commit_time, origin_id,
-                                                                  false);
+                                                                  true, false);
 
        if (standbyState == STANDBY_DISABLED)
        {
index 22e6a21414423c0bb05333585d41ec5f201f83e8..0266d61bbdb7b74eb4274d72cb8bf13d403f2fbe 100644 (file)
@@ -5826,19 +5826,6 @@ do { \
                                                minValue))); \
 } while(0)
 
-#define RecoveryRequiresBoolParameter(param_name, currValue, masterValue) \
-do { \
-       bool _currValue = (currValue); \
-       bool _masterValue = (masterValue); \
-       if (_currValue != _masterValue) \
-               ereport(ERROR, \
-                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
-                                errmsg("hot standby is not possible because it requires \"%s\" to be same on master and standby (master has \"%s\", standby has \"%s\")", \
-                                               param_name, \
-                                               _masterValue ? "true" : "false", \
-                                               _currValue ? "true" : "false"))); \
-} while(0)
-
 /*
  * Check to see if required parameters are set high enough on this server
  * for various aspects of recovery operation.
@@ -5885,9 +5872,6 @@ CheckRequiredParameterValues(void)
                RecoveryRequiresIntParameter("max_locks_per_transaction",
                                                                         max_locks_per_xact,
                                                                         ControlFile->max_locks_per_xact);
-               RecoveryRequiresBoolParameter("track_commit_timestamp",
-                                                                         track_commit_timestamp,
-                                                                         ControlFile->track_commit_timestamp);
        }
 }
 
index bd05ab4d5cec351e1c0b4f71d1aa7c2ab7a64951..dc865d1bc3d389acd095b9163eb8e27a62ef459a 100644 (file)
@@ -24,7 +24,8 @@ extern bool check_track_commit_timestamp(bool *newval, void **extra,
 
 extern void TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
                                                           TransactionId *subxids, TimestampTz timestamp,
-                                                          RepOriginId nodeid, bool do_xlog);
+                                                          RepOriginId nodeid,
+                                                          bool replaying_xlog, bool write_xlog);
 extern bool TransactionIdGetCommitTsData(TransactionId xid,
                                                         TimestampTz *ts, RepOriginId *nodeid);
 extern TransactionId GetLatestCommitTsData(TimestampTz *ts,
@@ -67,4 +68,4 @@ extern void commit_ts_redo(XLogReaderState *record);
 extern void commit_ts_desc(StringInfo buf, XLogReaderState *record);
 extern const char *commit_ts_identify(uint8 info);
 
-#endif   /* COMMITTS_H */
+#endif   /* COMMIT_TS_H */