static void
ReplicationSlotShmemExit(int code, Datum arg)
{
- /* temp debugging aid to analyze 019_replslot_limit failures */
- elog(DEBUG3, "replication slot exit hook, %s active slot",
- MyReplicationSlot != NULL ? "with" : "without");
-
/* Make sure active replication slots are released */
if (MyReplicationSlot != NULL)
ReplicationSlotRelease();
Assert(MyReplicationSlot == NULL);
restart:
- /* temp debugging aid to analyze 019_replslot_limit failures */
- elog(DEBUG3, "temporary replication slot cleanup: begin");
-
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (i = 0; i < max_replication_slots; i++)
{
if (!s->in_use)
continue;
- /* unlocked read of active_pid is ok for debugging purposes */
- elog(DEBUG3, "temporary replication slot cleanup: %d in use, active_pid: %d",
- i, (int) s->active_pid);
-
SpinLockAcquire(&s->mutex);
if (s->active_pid == MyProcPid)
{
}
LWLockRelease(ReplicationSlotControlLock);
-
- elog(DEBUG3, "temporary replication slot cleanup: done");
}
/*
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
- /* temp debugging aid to analyze 019_replslot_limit failures */
- elog(DEBUG3, "replication slot drop: %s: begin", NameStr(slot->data.name));
-
/*
* If some other backend ran this code concurrently with us, we might try
* to delete a slot with a certain name while someone else was trying to
path, tmppath)));
}
- elog(DEBUG3, "replication slot drop: %s: removed on-disk",
- NameStr(slot->data.name));
-
/*
* The slot is definitely gone. Lock out concurrent scans of the array
* long enough to kill it. It's OK to clear the active PID here without
slot->active_pid = 0;
slot->in_use = false;
LWLockRelease(ReplicationSlotControlLock);
-
- elog(DEBUG3, "replication slot drop: %s: marked as not in use", NameStr(slot->data.name));
-
ConditionVariableBroadcast(&slot->active_cv);
- elog(DEBUG3, "replication slot drop: %s: notified others", NameStr(slot->data.name));
-
/*
* Slot is dead and doesn't prevent resource removal anymore, recompute
* limits.
ReplicationSlotsComputeRequiredXmin(false);
ReplicationSlotsComputeRequiredLSN();
- elog(DEBUG3, "replication slot drop: %s: computed required", NameStr(slot->data.name));
-
/*
* If removing the directory fails, the worst thing that will happen is
* that the user won't be able to create a new slot with the same name
ereport(WARNING,
(errmsg("could not remove directory \"%s\"", tmppath)));
- elog(DEBUG3, "replication slot drop: %s: removed directory", NameStr(slot->data.name));
-
/*
* Drop the statistics entry for the replication slot. Do this while
* holding ReplicationSlotAllocationLock so that we don't drop a
* a slot while we're still cleaning up the detritus of the old one.
*/
LWLockRelease(ReplicationSlotAllocationLock);
-
- elog(DEBUG3, "replication slot drop: %s: done",
- NameStr(slot->data.name));
}
/*
(void) kill(active_pid, SIGTERM);
last_signaled_pid = active_pid;
}
- else
- {
- /* temp debugging aid to analyze 019_replslot_limit failures */
- elog(DEBUG3, "not signalling process %d during invalidation of slot \"%s\"",
- active_pid, NameStr(slotname));
- }
/* Wait until the slot is released. */
ConditionVariableSleep(&s->active_cv,
XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN);
restart:
- /* temp debugging aid to analyze 019_replslot_limit failures */
- elog(DEBUG3, "begin invalidating obsolete replication slots older than %X/%X",
- LSN_FORMAT_ARGS(oldestLSN));
-
LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
for (int i = 0; i < max_replication_slots; i++)
{
ReplicationSlotsComputeRequiredLSN();
}
- elog(DEBUG3, "done invalidating obsolete replication slots");
-
return invalidated;
}
max_wal_size = 2MB
log_checkpoints = yes
max_slot_wal_keep_size = 1MB
-
- # temp debugging aid to analyze 019_replslot_limit failures
- log_min_messages=debug3
));
$node_primary3->start;
$node_primary3->safe_psql('postgres',
"SELECT pg_create_physical_replication_slot('rep3')");
# Take backup
$backup_name = 'my_backup';
-$node_primary3->backup($backup_name, backup_options => ['--verbose']);
+$node_primary3->backup($backup_name);
# Create standby
my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
$node_standby3->init_from_backup($node_primary3, $backup_name,
my $senderpid;
-# We've seen occasional cases where multiple walsender pids are active. It
-# could be that we're just observing process shutdown being slow. To collect
-# more information, retry a couple times, print a bit of debugging information
-# each iteration. Don't fail the test if retries find just one pid, the
-# buildfarm failures are too noisy.
+# We've seen occasional cases where multiple walsender pids are still active
+# at this point, apparently just due to process shutdown being slow. To avoid
+# spurious failures, retry a couple times.
my $i = 0;
while (1)
{