From: Robert Haas Date: Wed, 9 Jun 2021 20:21:14 +0000 (-0400) Subject: Fix corner case failure of new standby to follow new primary. X-Git-Tag: REL9_6_23~83 X-Git-Url: http://git.postgresql.org/gitweb/?a=commitdiff_plain;h=6eb5b9ae39176a7d40003b4c2e9ca22e6b205def;p=postgresql.git Fix corner case failure of new standby to follow new primary. This only happens if (1) the new standby has no WAL available locally, (2) the new standby is starting from the old timeline, (3) the promotion happened in the WAL segment from which the new standby is starting, (4) the timeline history file for the new timeline is available from the archive but the WAL files for are not (i.e. this is a race), (5) the WAL files for the new timeline are available via streaming, and (6) recovery_target_timeline='latest'. Commit ee994272ca50f70b53074f0febaec97e28f83c4e introduced this logic and was an improvement over the previous code, but it mishandled this case. If recovery_target_timeline='latest' and restore_command is set, validateRecoveryParameters() can change recoveryTargetTLI to be different from receiveTLI. If streaming is then tried afterward, expectedTLEs gets initialized with the history of the wrong timeline. It's supposed to be a list of entries explaining how to get to the target timeline, but in this case it ends up with a list of entries explaining how to get to the new standby's original timeline, which isn't right. Dilip Kumar and Robert Haas, reviewed by Kyotaro Horiguchi. Discussion: http://postgr.es/m/CAFiTN-sE-jr=LB8jQuxeqikd-Ux+jHiXyh4YDiZMPedgQKup0g@mail.gmail.com --- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 9ad6da922c9..7bcb3087e27 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -11835,11 +11835,19 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * pg_xlog by now. Use XLOG_FROM_STREAM so that * source info is set correctly and XLogReceiptTime * isn't changed. + * + * NB: We must set readTimeLineHistory based on + * recoveryTargetTLI, not receiveTLI. Normally they'll + * be the same, but if recovery_target_timeline is + * 'latest' and archiving is configured, then it's + * possible that we managed to retrieve one or more + * new timeline history files from the archive, + * updating recoveryTargetTLI. */ if (readFile < 0) { if (!expectedTLEs) - expectedTLEs = readTimeLineHistory(receiveTLI); + expectedTLEs = readTimeLineHistory(recoveryTargetTLI); readFile = XLogFileRead(readSegNo, PANIC, receiveTLI, XLOG_FROM_STREAM, false); diff --git a/src/test/recovery/t/025_stuck_on_old_timeline.pl b/src/test/recovery/t/025_stuck_on_old_timeline.pl new file mode 100644 index 00000000000..9c8e9fd57d8 --- /dev/null +++ b/src/test/recovery/t/025_stuck_on_old_timeline.pl @@ -0,0 +1,107 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Testing streaming replication where standby is promoted and a new cascading +# standby (without WAL) is connected to the promoted standby. Both archiving +# and streaming are enabled, but only the history file is available from the +# archive, so the WAL files all have to be streamed. Test that the cascading +# standby can follow the new primary (promoted standby). +use strict; +use warnings; +use PostgresNode; +use TestLib; +use FindBin; +use Test::More tests => 1; + +# Initialize primary node +my $node_primary = get_new_node('primary'); + +# Set up an archive command that will copy the history file but not the WAL +# files. No real archive command should behave this way; the point is to +# simulate a race condition where the new cascading standby starts up after +# the timeline history file reaches the archive but before any of the WAL files +# get there. +$node_primary->init(allows_streaming => 1, has_archiving => 1); +my $perlbin = $^X; +$perlbin =~ s{\\}{\\\\}g if ($TestLib::windows_os); +my $archivedir_primary = $node_primary->archive_dir; +$node_primary->append_conf('postgresql.conf', qq( +archive_command = '$perlbin "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"' +)); +$node_primary->start; + +# Take backup from primary +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + allows_streaming => 1, has_streaming => 1, has_archiving => 1); +$node_standby->start; + +# Take backup of standby. +$node_standby->backup($backup_name); + +# Clear out WAL files from pg_xlog so that when we creating the cascading +# standby it will start up with no WAL available. +my $pgxlogdir = $node_standby->backup_dir . "/" . $backup_name . "/pg_xlog"; +opendir(my $dh, $pgxlogdir) or die "failed to open $pgxlogdir: $!"; +while (my $f = readdir($dh)) +{ + next if -d "$pgxlogdir/$f"; + unlink("$pgxlogdir/$f") or die "failed to unlink $pgxlogdir/$f: $!"; +} +closedir($dh); + +# Create cascading standby but don't start it yet. +# Must set up both streaming and archiving. +my $node_cascade = get_new_node('cascade'); +$node_cascade->init_from_backup($node_standby, $backup_name, + has_streaming => 1); +$node_cascade->enable_restoring($node_primary); +$node_cascade->append_conf('recovery.conf', qq( +recovery_target_timeline='latest' +)); + +# Promote the standby. +$node_standby->promote; + +# Wait for promotion to complete +$node_standby->poll_query_until('postgres', + "SELECT NOT pg_is_in_recovery();") + or die "Timed out while waiting for promotion"; + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node_standby->safe_psql('postgres', + "SELECT pg_xlogfile_name(pg_current_xlog_location());"); + +# Make WAL segment eligible for archival +$node_standby->safe_psql('postgres', 'SELECT pg_switch_xlog()'); + +# Wait until the WAL segment has been archived. +# Since the history file gets created on promotion and is archived before any +# WAL segment, this is enough to guarantee that the history file was +# archived. +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;"; +$node_standby->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; +my $last_archived_wal_file = $walfile_to_be_archived; + +# Start cascade node +$node_cascade->start; + +# Create some content on promoted standby and check its presence on the +# cascading standby. +$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a"); + +# Wait for the replication to catch up +$node_standby->wait_for_catchup($node_cascade, 'replay', + $node_standby->lsn('insert')); + +# Check that cascading standby has the new content +my $result = + $node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "cascade: $result\n"; +is($result, 1, 'check streamed content on cascade standby'); diff --git a/src/test/recovery/t/cp_history_files b/src/test/recovery/t/cp_history_files new file mode 100644 index 00000000000..cfeea41e5b9 --- /dev/null +++ b/src/test/recovery/t/cp_history_files @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +use File::Copy; +use strict; +use warnings; + +die "wrong number of arguments" if @ARGV != 2; +my ($source, $target) = @ARGV; +exit if $source !~ /history/; +copy($source, $target) or die "couldn't copy $source to $target: $!";