From 1d15d46a9ea461e9f4da21dd8a4f907b73df1f07 Mon Sep 17 00:00:00 2001
From: Nisha Moond <nisha.moond412@gmail.com>
Date: Tue, 28 Jan 2025 16:26:20 +0530
Subject: [PATCH v64 2/2] Add TAP test for slot invalidation based on inactive
 timeout.

Since the minimum value for GUC 'idle_replication_slot_timeout' is one minute,
the test takes 2-3 minutes to complete and is disabled by default.
Use PG_TEST_EXTRA=idle_replication_slot_timeout with "make" to run the test.
---
 doc/src/sgml/regress.sgml                     |  10 +
 src/test/recovery/README                      |   5 +
 src/test/recovery/meson.build                 |   1 +
 .../t/044_invalidate_inactive_slots.pl        | 190 ++++++++++++++++++
 4 files changed, 206 insertions(+)
 create mode 100644 src/test/recovery/t/044_invalidate_inactive_slots.pl

diff --git a/doc/src/sgml/regress.sgml b/doc/src/sgml/regress.sgml
index 7c474559bd..193622dbf5 100644
--- a/doc/src/sgml/regress.sgml
+++ b/doc/src/sgml/regress.sgml
@@ -347,6 +347,16 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance libpq_encryption'
       </para>
      </listitem>
     </varlistentry>
+
+    <varlistentry>
+     <term><literal>idle_replication_slot_timeout</literal></term>
+     <listitem>
+      <para>
+       Runs the test <filename>src/test/recovery/t/044_invalidate_inactive_slots.pl</filename>.
+       Not enabled by default because it is time consuming.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
 
    Tests for features that are not supported by the current build
diff --git a/src/test/recovery/README b/src/test/recovery/README
index 896df0ad05..2941776780 100644
--- a/src/test/recovery/README
+++ b/src/test/recovery/README
@@ -30,4 +30,9 @@ PG_TEST_EXTRA=wal_consistency_checking
 to the "make" command.  This is resource-intensive, so it's not done
 by default.
 
+If you want to test idle_replication_slot_timeout, add
+PG_TEST_EXTRA=idle_replication_slot_timeout
+to the "make" command. This test takes over 2 minutes, so it's not done
+by default.
+
 See src/test/perl/README for more info about running these tests.
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 0428704dbf..057bcde143 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -52,6 +52,7 @@ tests += {
       't/041_checkpoint_at_promote.pl',
       't/042_low_level_backup.pl',
       't/043_no_contrecord_switch.pl',
+      't/044_invalidate_inactive_slots.pl',
     ],
   },
 }
diff --git a/src/test/recovery/t/044_invalidate_inactive_slots.pl b/src/test/recovery/t/044_invalidate_inactive_slots.pl
new file mode 100644
index 0000000000..efd2b050f0
--- /dev/null
+++ b/src/test/recovery/t/044_invalidate_inactive_slots.pl
@@ -0,0 +1,190 @@
+# Copyright (c) 2025, PostgreSQL Global Development Group
+
+# Test for replication slots invalidation
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::Cluster;
+use Test::More;
+
+# The test takes over two minutes to complete. Run it only if
+# idle_replication_slot_timeout is specified in PG_TEST_EXTRA.
+if (  !$ENV{PG_TEST_EXTRA}
+	|| $ENV{PG_TEST_EXTRA} !~ /\bidle_replication_slot_timeout\b/)
+{
+	plan skip_all =>
+	  'test idle_replication_slot_timeout not enabled in PG_TEST_EXTRA';
+}
+
+# =============================================================================
+# Testcase start
+#
+# Test invalidation of streaming standby slot and logical failover slot on the
+# primary due to idle timeout. Also, test logical failover slot synced to
+# the standby from the primary doesn't get invalidated on its own, but gets the
+# invalidated state from the primary.
+
+# Initialize primary
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(allows_streaming => 'logical');
+
+# Avoid unpredictability
+$primary->append_conf(
+	'postgresql.conf', qq{
+checkpoint_timeout = 1h
+});
+$primary->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$primary->backup($backup_name);
+
+# Create sync slot on the primary
+$primary->psql('postgres',
+	q{SELECT pg_create_logical_replication_slot('sync_slot1', 'test_decoding', false, false, true);}
+);
+
+# Create standby slot on the primary
+$primary->safe_psql(
+	'postgres', qq[
+    SELECT pg_create_physical_replication_slot(slot_name := 'sb_slot1', immediately_reserve := true);
+]);
+
+# Create standby
+my $standby1 = PostgreSQL::Test::Cluster->new('standby1');
+$standby1->init_from_backup($primary, $backup_name, has_streaming => 1);
+
+my $connstr = $primary->connstr;
+$standby1->append_conf(
+	'postgresql.conf', qq(
+hot_standby_feedback = on
+primary_slot_name = 'sb_slot1'
+idle_replication_slot_timeout = 1
+primary_conninfo = '$connstr dbname=postgres'
+));
+$standby1->start;
+
+# Wait until the standby has replayed enough data
+$primary->wait_for_catchup($standby1);
+
+# Set timeout GUC on the standby to verify that the next checkpoint will not
+# invalidate synced slots.
+my $idle_timeout_1min = 1;
+
+# Sync the primary slots to the standby
+$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+
+# Confirm that the logical failover slot is created on the standby
+is( $standby1->safe_psql(
+		'postgres',
+		q{SELECT count(*) = 1 FROM pg_replication_slots
+		  WHERE slot_name = 'sync_slot1' AND synced
+			AND NOT temporary
+			AND invalidation_reason IS NULL;}
+	),
+	't',
+	'logical slot sync_slot1 is synced to standby');
+
+# Give enough time for inactive_since to exceed the timeout
+sleep($idle_timeout_1min * 60 + 10);
+
+# On standby, synced slots are not invalidated by the idle timeout
+# until the invalidation state is propagated from the primary.
+$standby1->safe_psql('postgres', "CHECKPOINT");
+is( $standby1->safe_psql(
+		'postgres',
+		q{SELECT count(*) = 1 FROM pg_replication_slots
+		  WHERE slot_name = 'sync_slot1'
+			AND invalidation_reason IS NULL;}
+	),
+	't',
+	'check that synced slot sync_slot1 has not been invalidated on standby');
+
+my $logstart = -s $primary->logfile;
+
+# Set timeout GUC so that the next checkpoint will invalidate inactive slots
+$primary->safe_psql(
+	'postgres', qq[
+    ALTER SYSTEM SET idle_replication_slot_timeout TO '${idle_timeout_1min}min';
+]);
+$primary->reload;
+
+# Wait for logical failover slot to become inactive on the primary. Note that
+# nobody has acquired the slot yet, so it must get invalidated due to
+# idle timeout.
+wait_for_slot_invalidation($primary, 'sync_slot1', $logstart,
+	$idle_timeout_1min);
+
+# Re-sync the primary slots to the standby. Note that the primary slot was
+# already invalidated (above) due to idle timeout. The standby must just
+# sync the invalidated state.
+$standby1->safe_psql('postgres', "SELECT pg_sync_replication_slots();");
+
+is( $standby1->safe_psql(
+		'postgres',
+		q{SELECT count(*) = 1 FROM pg_replication_slots
+		  WHERE slot_name = 'sync_slot1'
+			AND invalidation_reason = 'idle_timeout';}
+	),
+	"t",
+	'check that invalidation of synced slot sync_slot1 is synced on standby');
+
+# Make the standby slot on the primary inactive and check for invalidation
+$standby1->stop;
+wait_for_slot_invalidation($primary, 'sb_slot1', $logstart,
+	$idle_timeout_1min);
+
+# Testcase end
+# =============================================================================
+
+# Wait for slot to first become idle and then get invalidated
+sub wait_for_slot_invalidation
+{
+	my ($node, $slot, $offset, $idle_timeout_mins) = @_;
+	my $node_name = $node->name;
+
+	trigger_slot_invalidation($node, $slot, $offset, $idle_timeout_mins);
+
+	# Check that an invalidated slot cannot be acquired
+	my ($result, $stdout, $stderr);
+	($result, $stdout, $stderr) = $node->psql(
+		'postgres', qq[
+			SELECT pg_replication_slot_advance('$slot', '0/1');
+	]);
+	ok( $stderr =~ /can no longer get changes from replication slot "$slot"/,
+		"detected error upon trying to acquire invalidated slot $slot on node $node_name"
+	  )
+	  or die
+	  "could not detect error upon trying to acquire invalidated slot $slot on node $node_name";
+}
+
+# Trigger slot invalidation and confirm it in the server log
+sub trigger_slot_invalidation
+{
+	my ($node, $slot, $offset, $idle_timeout_mins) = @_;
+	my $node_name = $node->name;
+	my $invalidated = 0;
+
+	# Give enough time for inactive_since to exceed the timeout
+	sleep($idle_timeout_mins * 60 + 10);
+
+	# Run a checkpoint
+	$node->safe_psql('postgres', "CHECKPOINT");
+
+	# The slot's invalidation should be logged
+	$node->wait_for_log(qr/invalidating obsolete replication slot \"$slot\"/,
+		$offset);
+
+	# Check that the invalidation reason is 'idle_timeout'
+	$node->poll_query_until(
+		'postgres', qq[
+		SELECT COUNT(slot_name) = 1 FROM pg_replication_slots
+			WHERE slot_name = '$slot' AND
+			invalidation_reason = 'idle_timeout';
+	])
+	  or die
+	  "Timed out while waiting for invalidation reason of slot $slot to be set on node $node_name";
+}
+
+done_testing();
-- 
2.34.1

