From d6dc779388e1474400b653d9ff9c06a711d4f9f8 Mon Sep 17 00:00:00 2001
From: Zhijie Hou <houzj.fnst@fujitsu.com>
Date: Wed, 4 Mar 2026 10:29:42 +0800
Subject: [PATCH v7 1/2] Extend the retry logic in pg_sync_replication_slots()

Currently, pg_sync_replication_slots() reports an error when the WAL prior to a
replication slot's confirmed_flush_lsn has not been flushed on the standby. This
requires users to repeatedly invoke the function until the standby catches up,
which is not user-friendly.

This commit improves the behavior by making the function cyclically retry until
the standby catches up and the slot is successfully persisted, following the
retry logic introduced in commit 0d2d4a0.
---
 doc/src/sgml/func/func-admin.sgml          | 4 +++-
 src/backend/replication/logical/slotsync.c | 6 +++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/doc/src/sgml/func/func-admin.sgml b/doc/src/sgml/func/func-admin.sgml
index 3ac81905d1f..7f20faf99e8 100644
--- a/doc/src/sgml/func/func-admin.sgml
+++ b/doc/src/sgml/func/func-admin.sgml
@@ -1495,7 +1495,9 @@ postgres=# SELECT '0/0'::pg_lsn + pd.segment_number * ps.setting::int + :offset
         Synchronize the logical failover replication slots from the primary
         server to the standby server. This function can only be executed on the
         standby server. Temporary synced slots, if any, cannot be used for
-        logical decoding and must be dropped after promotion. See
+        logical decoding and must be dropped after promotion. This function
+        retries cyclically until all the failover slots that existed on
+        primary at the start of the function call are synchronized. See
         <xref linkend="logicaldecoding-replication-slots-synchronization"/> for details.
         Note that this function cannot be executed if
         <link linkend="guc-sync-replication-slots"><varname>
diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 062a08ccb88..99f4655ff99 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -34,6 +34,10 @@
  * RS_TEMPORARY. Once the decoding from corresponding LSNs can reach a
  * consistent point, they will be marked as RS_PERSISTENT.
  *
+ * If the WAL prior to the remote slot's confirmed_flush_lsn has not been
+ * flushed on the standby, the slot is marked as RS_TEMPORARY. Once the standby
+ * catches up and flushes that WAL, the slot is promoted to RS_PERSISTENT.
+ *
  * The slot sync worker waits for some time before the next synchronization,
  * with the duration varying based on whether any slots were updated during
  * the last cycle. Refer to the comments above wait_for_slot_activity() for
@@ -218,7 +222,7 @@ update_local_synced_slot(RemoteSlot *remote_slot, Oid remote_dbid)
 		 * Can get here only if GUC 'synchronized_standby_slots' on the
 		 * primary server was not configured correctly.
 		 */
-		ereport(AmLogicalSlotSyncWorkerProcess() ? LOG : ERROR,
+		ereport(LOG,
 				errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
 				errmsg("skipping slot synchronization because the received slot sync"
 					   " LSN %X/%08X for slot \"%s\" is ahead of the standby position %X/%08X",
-- 
2.51.1.windows.1

