From 40ded2d62287aa8fe936f2e1ba4eafa42a11a905 Mon Sep 17 00:00:00 2001
From: Anthony Hsu <erwaman@gmail.com>
Date: Sun, 22 Feb 2026 18:26:01 +0000
Subject: [PATCH v2] Set 1s WaitLatch timeout if standby limit has expired in
 ResolveRecoveryConflictWithBufferPin

When the startup process tries to acquire a cleanup lock and encounters
a buffer pin conflict, it calls ResolveRecoveryConflictWithBufferPin.
The startup process will wait up to the standby limit before
broadcasting a PROCSIG_RECOVERY_CONFLICT_BUFFERPIN signal. Backends
receiving this signal will cancel themselves if they are holding a
conflicting buffer pin. However, a possible race scenario is:

1. Suppose the standby limit has expired, so the startup process broadcasts the
   PROCSIG_RECOVERY_CONFLICT_BUFFERPIN signal.
2. A backend receives the signal, finds that it does NOT hold a
   conflicting buffer pin, and does nothing.
3. This backend (or a new backend) _then_ acquires a conflicting buffer pin.

Previously, the startup process would not resend
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN to the backend even though standby
limit has already expired. The startup process would just keep waiting
until the pin count is reduced to 1 and finally UnpinBuffer() wakes it
up.

This patch introduces a 1s timeout to the WaitLatch that startup does if
the standby limit has already expired. This ensures if new pinners
arrive after startup has already broadcast the
PROCSIG_RECOVERY_CONFLICT_BUFFERPIN signal, startup will rebroadcast the
signal periodically so that the new pinners will be notified to cancel
themselves if they are blocking startup.
---
 src/backend/storage/ipc/standby.c | 22 +++++++++++++++-------
 src/backend/storage/lmgr/proc.c   | 19 ++++++++++++++++---
 src/include/storage/proc.h        |  2 ++
 3 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/backend/storage/ipc/standby.c b/src/backend/storage/ipc/standby.c
index d83afbfb9d6..49b57a8aa1d 100644
--- a/src/backend/storage/ipc/standby.c
+++ b/src/backend/storage/ipc/standby.c
@@ -793,12 +793,14 @@ void
 ResolveRecoveryConflictWithBufferPin(void)
 {
 	TimestampTz ltime;
+	bool standby_limit_expired;
 
 	Assert(InHotStandby);
 
 	ltime = GetStandbyLimitTime();
+	standby_limit_expired = GetCurrentTimestamp() >= ltime && ltime != 0;
 
-	if (GetCurrentTimestamp() >= ltime && ltime != 0)
+	if (standby_limit_expired)
 	{
 		/*
 		 * We're already behind, so clear a path as quickly as possible.
@@ -833,14 +835,20 @@ ResolveRecoveryConflictWithBufferPin(void)
 
 	/*
 	 * Wait to be signaled by UnpinBuffer() or for the wait to be interrupted
-	 * by one of the timeouts established above.
+	 * by one of the timeouts established above. If the standby limit has
+	 * already expired, we also set a 1s timeout. This ensures that if backends
+	 * acquire conflicting pins *after* processing the
+	 * PROCSIG_RECOVERY_CONFLICT_BUFFERPIN signal, the startup process will
+	 * resend the signal to notify these backends to cancel their queries.
 	 *
-	 * We assume that only UnpinBuffer() and the timeout requests established
-	 * above can wake us up here. WakeupRecovery() called by walreceiver or
-	 * SIGHUP signal handler, etc cannot do that because it uses the different
-	 * latch from that ProcWaitForSignal() waits on.
+	 * We assume that only UnpinBuffer(), the timeout requests established
+	 * above, and the 1s timeout (if standby limit has expired) can wake us up
+	 * here. WakeupRecovery() called by walreceiver or SIGHUP signal handler,
+	 * etc cannot do that because it uses the different latch from that
+	 * ProcWaitForSignalWithTimeout() waits on.
 	 */
-	ProcWaitForSignal(WAIT_EVENT_BUFFER_CLEANUP);
+	ProcWaitForSignalWithTimeout(WAIT_EVENT_BUFFER_CLEANUP,
+								 standby_limit_expired ? 1000 : 0);
 
 	if (got_standby_delay_timeout)
 		SendRecoveryConflictWithBufferPin(RECOVERY_CONFLICT_BUFFERPIN);
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e2f34075d39..f2112baf710 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1962,16 +1962,29 @@ GetLockHoldersAndWaiters(LOCALLOCK *locallock, StringInfo lock_holders_sbuf,
 
 /*
  * ProcWaitForSignal - wait for a signal from another backend.
+ */
+void
+ProcWaitForSignal(uint32 wait_event_info)
+{
+	ProcWaitForSignalWithTimeout(wait_event_info, 0);
+}
+
+/*
+ * ProcWaitForSignalWithTimeout - wait for a signal from another backend with a
+ * timeout.
  *
  * As this uses the generic process latch the caller has to be robust against
  * unrelated wakeups: Always check that the desired state has occurred, and
  * wait again if not.
  */
 void
-ProcWaitForSignal(uint32 wait_event_info)
+ProcWaitForSignalWithTimeout(uint32 wait_event_info, long timeout_ms)
 {
-	(void) WaitLatch(MyLatch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0,
-					 wait_event_info);
+	int wake_events = WL_LATCH_SET | WL_EXIT_ON_PM_DEATH;
+	if (timeout_ms > 0)
+		wake_events |= WL_TIMEOUT;
+
+	(void) WaitLatch(MyLatch, wake_events, timeout_ms, wait_event_info);
 	ResetLatch(MyLatch);
 	CHECK_FOR_INTERRUPTS();
 }
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index a8d2e7db1a1..c68698817a7 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -576,6 +576,8 @@ extern void GetLockHoldersAndWaiters(LOCALLOCK *locallock,
 									 int *lockHoldersNum);
 
 extern void ProcWaitForSignal(uint32 wait_event_info);
+extern void ProcWaitForSignalWithTimeout(uint32 wait_event_info,
+										 long timeout_ms);
 extern void ProcSendSignal(ProcNumber procNumber);
 
 extern PGPROC *AuxiliaryPidGetProc(int pid);
-- 
2.53.0.345.g96ddfc5eaa-goog

