From 61d17c40775bd6b3fe5f92a89aa896930068b5dd Mon Sep 17 00:00:00 2001
From: Shveta Malik <shveta.malik@gmail.com>
Date: Thu, 4 Apr 2024 15:58:28 +0530
Subject: [PATCH v1] Handle stopSignaled during sync function call.

During promotion, startup process shuts down slot sync
worker and sets 'stopSignaled'. Menawhile if slot sync worker
is restarted by postmaster, the worker exits if 'stopSignaled'
is set.

This handling was missing in slot sync SQL function pg_sync_replication_slots().
This patch adds same handling for this function call. Changes are:

1) ShutDownSlotSync() now checks 'SlotSyncCtx->syncing' flag as well and
waits for it to become false i.e. waits till parallel running SQL
function is finished.

2) On the other hand, pg_sync_replication_slots() respects the 'stopSignaled'
flag and becomes a no-op if executed by the user in parallel to promotion.
---
 src/backend/replication/logical/slotsync.c | 37 +++++++++++++++++++---
 1 file changed, 32 insertions(+), 5 deletions(-)

diff --git a/src/backend/replication/logical/slotsync.c b/src/backend/replication/logical/slotsync.c
index 9ac847b780..16b200b895 100644
--- a/src/backend/replication/logical/slotsync.c
+++ b/src/backend/replication/logical/slotsync.c
@@ -1357,6 +1357,10 @@ ReplSlotSyncWorkerMain(char *startup_data, size_t startup_data_len)
 
 /*
  * Shut down the slot sync worker.
+ *
+ * It sends signal to shutdown slot sync worker. It also waits till
+ * the slot sync worker has exited and pg_sync_replication_slots()
+ * has finished.
  */
 void
 ShutDownSlotSync(void)
@@ -1365,16 +1369,21 @@ ShutDownSlotSync(void)
 
 	SlotSyncCtx->stopSignaled = true;
 
-	if (SlotSyncCtx->pid == InvalidPid)
+	/*
+	 * Return if neither the slot sync worker is running nor the function
+	 * pg_sync_replication_slots().
+	 */
+	if ((SlotSyncCtx->pid == InvalidPid) && !SlotSyncCtx->syncing)
 	{
 		SpinLockRelease(&SlotSyncCtx->mutex);
 		return;
 	}
 	SpinLockRelease(&SlotSyncCtx->mutex);
 
-	kill(SlotSyncCtx->pid, SIGINT);
+	if (SlotSyncCtx->pid != InvalidPid)
+		kill(SlotSyncCtx->pid, SIGINT);
 
-	/* Wait for it to die */
+	/* Wait for slot sync to exit */
 	for (;;)
 	{
 		int			rc;
@@ -1392,8 +1401,11 @@ ShutDownSlotSync(void)
 
 		SpinLockAcquire(&SlotSyncCtx->mutex);
 
-		/* Is it gone? */
-		if (SlotSyncCtx->pid == InvalidPid)
+		/*
+		 * Confirm that both the worker and the function
+		 * pg_sync_replication_slots() are done.
+		 */
+		if ((SlotSyncCtx->pid == InvalidPid) && !SlotSyncCtx->syncing)
 			break;
 
 		SpinLockRelease(&SlotSyncCtx->mutex);
@@ -1500,6 +1512,21 @@ slotsync_failure_callback(int code, Datum arg)
 void
 SyncReplicationSlots(WalReceiverConn *wrconn)
 {
+	/*
+	 * Startup process signaled the slot sync to stop, so if meanwhile user
+	 * has initiated slot sync function call, be no-op.
+	 */
+	SpinLockAcquire(&SlotSyncCtx->mutex);
+	if (SlotSyncCtx->stopSignaled)
+	{
+		ereport(LOG,
+				errmsg("skipping slot synchronization as slot sync shutdown is signaled during promotion"));
+
+		SpinLockRelease(&SlotSyncCtx->mutex);
+		return;
+	}
+	SpinLockRelease(&SlotSyncCtx->mutex);
+
 	PG_ENSURE_ERROR_CLEANUP(slotsync_failure_callback, PointerGetDatum(wrconn));
 	{
 		validate_remote_info(wrconn);
-- 
2.34.1

