From 07a34d2541687779fb07ce1a294454659279b4e2 Mon Sep 17 00:00:00 2001
From: Khanna <Shubham.Khanna@fujitsu.com>
Date: Tue, 24 Dec 2024 12:27:27 +0530
Subject: [PATCH v10] Validate-max_slot_wal_keep_size-in-pg_createsubscriber

This patch introduces validation for the 'max_slot_wal_keep_size' GUC in the
'pg_createsubscriber' utility. The utility now checks that the publisher's
'max_slot_wal_keep_size' is set to '-1' during subscription creation.
This ensures proper functioning of logical replication slots.

The 'pg_createsubscriber' utility is updated to fetch and validate the
'max_slot_wal_keep_size' setting from the publisher. A warning is raised during
the '--dry-run' mode if the configuration is set to a non-default value.

This patch logs a warning if 'max_slot_wal_keep_size' is not set to '-1',
highlighting a potential risk of required WAL files being deleted on the
publisher, which could disrupt logical replication.
A test case has been added to validate correct warning reporting
when 'max_slot_wal_keep_size' is misconfigured.
---
 doc/src/sgml/ref/pg_createsubscriber.sgml     |  7 ++
 src/bin/pg_basebackup/pg_createsubscriber.c   | 20 +++++-
 .../t/040_pg_createsubscriber.pl              | 64 +++++++++++++++----
 3 files changed, 76 insertions(+), 15 deletions(-)

diff --git a/doc/src/sgml/ref/pg_createsubscriber.sgml b/doc/src/sgml/ref/pg_createsubscriber.sgml
index 26b8e64a4e..40b8fac50e 100644
--- a/doc/src/sgml/ref/pg_createsubscriber.sgml
+++ b/doc/src/sgml/ref/pg_createsubscriber.sgml
@@ -313,6 +313,13 @@ PostgreSQL documentation
     linkend="guc-max-wal-senders"/> configured to a value greater than or equal
     to the number of specified databases and existing WAL sender processes.
    </para>
+
+   <para>
+   Replication failures can occur if required WAL files are missing. To prevent
+   this, the source server must set <xref linkend="guc-max-slot-wal-keep-size"/>
+   to <literal>-1</literal> to ensure that required WAL files are not
+   prematurely removed.
+   </para>
   </refsect2>
 
   <refsect2>
diff --git a/src/bin/pg_basebackup/pg_createsubscriber.c b/src/bin/pg_basebackup/pg_createsubscriber.c
index faf18ccf13..7433c7ae2a 100644
--- a/src/bin/pg_basebackup/pg_createsubscriber.c
+++ b/src/bin/pg_basebackup/pg_createsubscriber.c
@@ -849,6 +849,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	int			max_walsenders;
 	int			cur_walsenders;
 	int			max_prepared_transactions;
+	char	   *max_slot_wal_keep_size;
 
 	pg_log_info("checking settings on publisher");
 
@@ -872,6 +873,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	 * - wal_level = logical
 	 * - max_replication_slots >= current + number of dbs to be converted
 	 * - max_wal_senders >= current + number of dbs to be converted
+	 * - max_slot_wal_keep_size = -1 (to prevent deletion of required WAL files)
 	 * -----------------------------------------------------------------------
 	 */
 	res = PQexec(conn,
@@ -880,7 +882,8 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 				 " (SELECT count(*) FROM pg_catalog.pg_replication_slots),"
 				 " pg_catalog.current_setting('max_wal_senders'),"
 				 " (SELECT count(*) FROM pg_catalog.pg_stat_activity WHERE backend_type = 'walsender'),"
-				 " pg_catalog.current_setting('max_prepared_transactions')");
+				 " pg_catalog.current_setting('max_prepared_transactions'),"
+				 " pg_catalog.current_setting('max_slot_wal_keep_size')");
 
 	if (PQresultStatus(res) != PGRES_TUPLES_OK)
 	{
@@ -895,6 +898,7 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	max_walsenders = atoi(PQgetvalue(res, 0, 3));
 	cur_walsenders = atoi(PQgetvalue(res, 0, 4));
 	max_prepared_transactions = atoi(PQgetvalue(res, 0, 5));
+	max_slot_wal_keep_size = pg_strdup(PQgetvalue(res, 0, 6));
 
 	PQclear(res);
 
@@ -905,6 +909,8 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 	pg_log_debug("publisher: current wal senders: %d", cur_walsenders);
 	pg_log_debug("publisher: max_prepared_transactions: %d",
 				 max_prepared_transactions);
+	pg_log_debug("publisher: max_slot_wal_keep_size: %s",
+				 max_slot_wal_keep_size);
 
 	disconnect_database(conn, false);
 
@@ -939,6 +945,18 @@ check_publisher(const struct LogicalRepInfo *dbinfo)
 							  "Prepared transactions will be replicated at COMMIT PREPARED.");
 	}
 
+	/*
+	 * Validate 'max_slot_wal_keep_size'. If this parameter is set to a
+	 * non-default value, it may cause replication failures due to required
+	 * WAL files being prematurely removed.
+	 */
+	if (dry_run && (strcmp(max_slot_wal_keep_size, "-1") != 0))
+	{
+		pg_log_warning("publisher requires WAL size must not be restricted");
+		pg_log_warning_hint("Set the configuration parameter \"%s\" to -1 to ensure that required WAL files are not prematurely removed.",
+							"max_slot_wal_keep_size");
+	}
+
 	pg_free(wal_level);
 
 	if (failed)
diff --git a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
index 5426159fa5..d55fd78009 100644
--- a/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
+++ b/src/bin/pg_basebackup/t/040_pg_createsubscriber.pl
@@ -318,24 +318,60 @@ $node_p->safe_psql($db1,
 $node_p->wait_for_replay_catchup($node_s);
 $node_s->stop;
 
-# dry run mode on node S
+# Configure 'max_slot_wal_keep_size = 10MB' on the publisher and
+# reload configuration
+$node_p->append_conf('postgresql.conf', 'max_slot_wal_keep_size = 10MB');
+$node_p->reload;
+
+# dry run mode on node S and verify the warning message for misconfigured
+# 'max_slot_wal_keep_size'
+command_checks_all(
+	[
+		'pg_createsubscriber', '--verbose',
+		'--verbose', '--recovery-timeout',
+		"$PostgreSQL::Test::Utils::timeout_default", '--dry-run',
+		'--pgdata', $node_s->data_dir,
+		'--publisher-server', $node_p->connstr($db1),
+		'--socketdir', $node_s->host,
+		'--subscriber-port', $node_s->port,
+		'--publication', 'pub1',
+		'--publication', 'pub2',
+		'--subscription', 'sub1',
+		'--subscription', 'sub2',
+		'--database', $db1,
+		'--database', $db2
+	],
+	0,
+	[qr/./],
+	[
+		qr/pg_createsubscriber: warning: publisher requires WAL size must not be restricted/,
+		qr/Set the configuration parameter "max_slot_wal_keep_size" to -1 to ensure that required WAL files are not prematurely removed./,
+	],
+	'Validate warning for misconfigured max_slot_wal_keep_size on the publisher'
+);
+
+# And, same again to see the output...
 command_ok(
 	[
 		'pg_createsubscriber', '--verbose',
-		'--recovery-timeout', "$PostgreSQL::Test::Utils::timeout_default",
-		'--dry-run', '--pgdata',
-		$node_s->data_dir, '--publisher-server',
-		$node_p->connstr($db1), '--socketdir',
-		$node_s->host, '--subscriber-port',
-		$node_s->port, '--publication',
-		'pub1', '--publication',
-		'pub2', '--subscription',
-		'sub1', '--subscription',
-		'sub2', '--database',
-		$db1, '--database',
-		$db2
+		'--verbose', '--recovery-timeout',
+		"$PostgreSQL::Test::Utils::timeout_default", '--dry-run',
+		'--pgdata', $node_s->data_dir,
+		'--publisher-server', $node_p->connstr($db1),
+		'--socketdir', $node_s->host,
+		'--subscriber-port', $node_s->port,
+		'--publication', 'pub1',
+		'--publication', 'pub2',
+		'--subscription', 'sub1',
+		'--subscription', 'sub2',
+		'--database', $db1,
+		'--database', $db2
 	],
-	'run pg_createsubscriber --dry-run on node S');
+	'Ditto above, but this time using command_ok to retain the output');
+
+# Reset 'max_slot_wal_keep_size' to default after the test
+$node_p->append_conf('postgresql.conf', 'max_slot_wal_keep_size = -1');
+$node_p->reload;
 
 # Check if node S is still a standby
 $node_s->start;
-- 
2.34.1

