From 5753debac7bd34aaaaca6a3463b62793d85d15b5 Mon Sep 17 00:00:00 2001
From: Jingxian Li <aqktjcm@qq.com>
Date: Thu, 8 Feb 2024 18:07:27 +0800
Subject: [PATCH] When requesting a lock without nowait, the lock may be
 granted in 4 ways before delaying deadlock detection: 1. locallock exists
 (LockAcquireExtended) 2. fastpath eligible (LockAcquireExtended) 3. no
 conflict with any existing or waiting lock request (LockAcquireExtended) 4.
 before the process joins the lock's wait queue(ProcSleep)

But if we request a lock in nowait mode, only the first 3 ways can be reached. Because the nowait failure logic occurs before ProcSleep is called.

To fix the bug above, ProcSleep function should be split into two parts: the first part (named InsertSelfIntoWaitQueue) is responsible for checking if the lock can be grantedb in the process of determining the insertion point, and adding the lock-requesting process to the lock's wait queue when check fails and  nowait not set, it shoud be called before nowait failure logic in LockAcquireExtended, in this way the nowait case can also have a change to acquire the lock in way 4. The second part (named ProcSleep) is just responsible for handling the deadlock, it is called after nowait logic failure as it was.
---
 src/backend/storage/lmgr/lock.c             | 160 ++++++++++----------
 src/backend/storage/lmgr/proc.c             |  88 +++++++----
 src/include/storage/proc.h                  |   4 +-
 src/test/isolation/expected/lock-nowait.out |   9 ++
 src/test/isolation/isolation_schedule       |   1 +
 src/test/isolation/specs/lock-nowait.spec   |  28 ++++
 6 files changed, 179 insertions(+), 111 deletions(-)
 create mode 100644 src/test/isolation/expected/lock-nowait.out
 create mode 100644 src/test/isolation/specs/lock-nowait.spec

diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c
index c70a1adb9a..724fe6fa8c 100644
--- a/src/backend/storage/lmgr/lock.c
+++ b/src/backend/storage/lmgr/lock.c
@@ -362,7 +362,7 @@ static PROCLOCK *SetupLockInTable(LockMethod lockMethodTable, PGPROC *proc,
 static void GrantLockLocal(LOCALLOCK *locallock, ResourceOwner owner);
 static void BeginStrongLockAcquire(LOCALLOCK *locallock, uint32 fasthashcode);
 static void FinishStrongLockAcquire(void);
-static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner);
+static void WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool early_deadlock);
 static void ReleaseLockIfHeld(LOCALLOCK *locallock, bool sessionLock);
 static void LockReassignOwner(LOCALLOCK *locallock, ResourceOwner parent);
 static bool UnGrantLock(LOCK *lock, LOCKMODE lockmode,
@@ -775,6 +775,7 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	LWLock	   *partitionLock;
 	bool		found_conflict;
 	bool		log_lock = false;
+	bool		early_deadlock = false;
 
 	if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods))
 		elog(ERROR, "unrecognized lock method: %d", lockmethodid);
@@ -1027,88 +1028,96 @@ LockAcquireExtended(const LOCKTAG *locktag,
 	else
 	{
 		/*
-		 * We can't acquire the lock immediately.  If caller specified no
-		 * blocking, remove useless table entries and return
-		 * LOCKACQUIRE_NOT_AVAIL without waiting.
+		 * Set bitmask of locks this process already holds on this object.
 		 */
-		if (dontWait)
+		MyProc->heldLocks = proclock->holdMask;
+		
+		if (InsertSelfIntoWaitQueue(locallock, lockMethodTable, dontWait, &early_deadlock) == PROC_WAIT_STATUS_OK)
 		{
-			AbortStrongLockAcquire();
-			if (proclock->holdMask == 0)
+			GrantLock(lock, proclock, lockmode);
+			GrantLockLocal(locallock, owner);
+		}
+		else
+		{
+			/*
+			 * We can't acquire the lock immediately.  If caller specified no
+			 * blocking, remove useless table entries and return
+			 * LOCKACQUIRE_NOT_AVAIL without waiting.
+			 */
+			if (dontWait)
 			{
-				uint32		proclock_hashcode;
-
-				proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
-				dlist_delete(&proclock->lockLink);
-				dlist_delete(&proclock->procLink);
-				if (!hash_search_with_hash_value(LockMethodProcLockHash,
-												 &(proclock->tag),
-												 proclock_hashcode,
-												 HASH_REMOVE,
-												 NULL))
-					elog(PANIC, "proclock table corrupted");
+				AbortStrongLockAcquire();
+				if (proclock->holdMask == 0)
+				{
+					uint32 proclock_hashcode;
+
+					proclock_hashcode = ProcLockHashCode(&proclock->tag, hashcode);
+					dlist_delete(&proclock->lockLink);
+					dlist_delete(&proclock->procLink);
+					if (!hash_search_with_hash_value(LockMethodProcLockHash,
+						&(proclock->tag),
+						proclock_hashcode,
+						HASH_REMOVE,
+						NULL))
+						elog(PANIC, "proclock table corrupted");
+				}
+				else
+					PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
+				lock->nRequested--;
+				lock->requested[lockmode]--;
+				LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
+				Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
+				Assert(lock->nGranted <= lock->nRequested);
+				LWLockRelease(partitionLock);
+				if (locallock->nLocks == 0)
+					RemoveLocalLock(locallock);
+				if (locallockp)
+					*locallockp = NULL;
+				return LOCKACQUIRE_NOT_AVAIL;
 			}
-			else
-				PROCLOCK_PRINT("LockAcquire: NOWAIT", proclock);
-			lock->nRequested--;
-			lock->requested[lockmode]--;
-			LOCK_PRINT("LockAcquire: conditional lock failed", lock, lockmode);
-			Assert((lock->nRequested > 0) && (lock->requested[lockmode] >= 0));
-			Assert(lock->nGranted <= lock->nRequested);
-			LWLockRelease(partitionLock);
-			if (locallock->nLocks == 0)
-				RemoveLocalLock(locallock);
-			if (locallockp)
-				*locallockp = NULL;
-			return LOCKACQUIRE_NOT_AVAIL;
-		}
 
-		/*
-		 * Set bitmask of locks this process already holds on this object.
-		 */
-		MyProc->heldLocks = proclock->holdMask;
-
-		/*
-		 * Sleep till someone wakes me up.
-		 */
+			/*
+			 * Sleep till someone wakes me up.
+			 */
 
-		TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1,
-										 locktag->locktag_field2,
-										 locktag->locktag_field3,
-										 locktag->locktag_field4,
-										 locktag->locktag_type,
-										 lockmode);
+			TRACE_POSTGRESQL_LOCK_WAIT_START(locktag->locktag_field1,
+				locktag->locktag_field2,
+				locktag->locktag_field3,
+				locktag->locktag_field4,
+				locktag->locktag_type,
+				lockmode);
 
-		WaitOnLock(locallock, owner);
+			WaitOnLock(locallock, owner, early_deadlock);
 
-		TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
-										locktag->locktag_field2,
-										locktag->locktag_field3,
-										locktag->locktag_field4,
-										locktag->locktag_type,
-										lockmode);
+			TRACE_POSTGRESQL_LOCK_WAIT_DONE(locktag->locktag_field1,
+				locktag->locktag_field2,
+				locktag->locktag_field3,
+				locktag->locktag_field4,
+				locktag->locktag_type,
+				lockmode);
 
-		/*
-		 * NOTE: do not do any material change of state between here and
-		 * return.  All required changes in locktable state must have been
-		 * done when the lock was granted to us --- see notes in WaitOnLock.
-		 */
+			/*
+			 * NOTE: do not do any material change of state between here and
+			 * return.  All required changes in locktable state must have been
+			 * done when the lock was granted to us --- see notes in WaitOnLock.
+			 */
 
-		/*
-		 * Check the proclock entry status, in case something in the ipc
-		 * communication doesn't work correctly.
-		 */
-		if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
-		{
-			AbortStrongLockAcquire();
-			PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
-			LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
-			/* Should we retry ? */
-			LWLockRelease(partitionLock);
-			elog(ERROR, "LockAcquire failed");
+			/*
+			 * Check the proclock entry status, in case something in the ipc
+			 * communication doesn't work correctly.
+			 */
+			if (!(proclock->holdMask & LOCKBIT_ON(lockmode)))
+			{
+				AbortStrongLockAcquire();
+				PROCLOCK_PRINT("LockAcquire: INCONSISTENT", proclock);
+				LOCK_PRINT("LockAcquire: INCONSISTENT", lock, lockmode);
+				/* Should we retry ? */
+				LWLockRelease(partitionLock);
+				elog(ERROR, "LockAcquire failed");
+			}
+			PROCLOCK_PRINT("LockAcquire: granted", proclock);
+			LOCK_PRINT("LockAcquire: granted", lock, lockmode);
 		}
-		PROCLOCK_PRINT("LockAcquire: granted", proclock);
-		LOCK_PRINT("LockAcquire: granted", lock, lockmode);
 	}
 
 	/*
@@ -1782,11 +1791,8 @@ MarkLockClear(LOCALLOCK *locallock)
  * The appropriate partition lock must be held at entry.
  */
 static void
-WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
+WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner, bool early_deadlock)
 {
-	LOCKMETHODID lockmethodid = LOCALLOCK_LOCKMETHOD(*locallock);
-	LockMethod	lockMethodTable = LockMethods[lockmethodid];
-
 	LOCK_PRINT("WaitOnLock: sleeping on lock",
 			   locallock->lock, locallock->tag.mode);
 
@@ -1815,7 +1821,7 @@ WaitOnLock(LOCALLOCK *locallock, ResourceOwner owner)
 	 */
 	PG_TRY();
 	{
-		if (ProcSleep(locallock, lockMethodTable) != PROC_WAIT_STATUS_OK)
+		if (ProcSleep(locallock, early_deadlock) != PROC_WAIT_STATUS_OK)
 		{
 			/*
 			 * We failed as a result of a deadlock, see CheckDeadLock(). Quit
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index e5977548fe..f59f6e9e55 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1018,39 +1018,29 @@ AuxiliaryPidGetProc(int pid)
 	return result;
 }
 
-
 /*
- * ProcSleep -- put a process to sleep on the specified lock
+ * InsertSelfIntoWaitQueue -- Check if the lock can be grantedb in the 
+ * process of determining the insertion point, and adding myProc to the 
+ * lock's wait queue when check fails and  dontWait was false.
  *
  * Caller must have set MyProc->heldLocks to reflect locks already held
  * on the lockable object by this process (under all XIDs).
  *
- * The lock table's partition lock must be held at entry, and will be held
- * at exit.
+ * The lock table's partition lock must be held at entry.
  *
- * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR if not (deadlock).
- *
- * ASSUME: that no one will fiddle with the queue until after
- *		we release the partition lock.
+ * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_WAITING if not.
  *
  * NOTES: The process queue is now a priority queue for locking.
  */
 ProcWaitStatus
-ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
+InsertSelfIntoWaitQueue(LOCALLOCK *locallock, LockMethod lockMethodTable, bool dontWait, bool *early_deadlock)
 {
 	LOCKMODE	lockmode = locallock->tag.mode;
 	LOCK	   *lock = locallock->lock;
 	PROCLOCK   *proclock = locallock->proclock;
-	uint32		hashcode = locallock->hashcode;
-	LWLock	   *partitionLock = LockHashPartitionLock(hashcode);
 	dclist_head *waitQueue = &lock->waitProcs;
 	PGPROC	   *insert_before = NULL;
 	LOCKMASK	myHeldLocks = MyProc->heldLocks;
-	TimestampTz standbyWaitStart = 0;
-	bool		early_deadlock = false;
-	bool		allow_autovacuum_cancel = true;
-	bool		logged_recovery_conflict = false;
-	ProcWaitStatus myWaitStatus;
 	PGPROC	   *leader = MyProc->lockGroupLeader;
 
 	/*
@@ -1125,8 +1115,11 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 					 * a flag to check below, and break out of loop.  Also,
 					 * record deadlock info for later message.
 					 */
-					RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
-					early_deadlock = true;
+					if (!dontWait)
+					{
+						RememberSimpleDeadLock(MyProc, lockmode, lock, proc);
+						*early_deadlock = true;
+					}
 					break;
 				}
 				/* I must go before this waiter.  Check special case. */
@@ -1135,8 +1128,6 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 										proclock))
 				{
 					/* Skip the wait and just grant myself the lock. */
-					GrantLock(lock, proclock, lockmode);
-					GrantAwaitedLock();
 					return PROC_WAIT_STATUS_OK;
 				}
 
@@ -1149,22 +1140,53 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 		}
 	}
 
-	/*
-	 * Insert self into queue, at the position determined above.
-	 */
-	if (insert_before)
-		dclist_insert_before(waitQueue, &insert_before->links, &MyProc->links);
-	else
-		dclist_push_tail(waitQueue, &MyProc->links);
+	if (!dontWait)
+	{
+		/*
+		 * Insert self into queue, at the position determined above.
+		 */
+		if (insert_before)
+			dclist_insert_before(waitQueue, &insert_before->links, &MyProc->links);
+		else
+			dclist_push_tail(waitQueue, &MyProc->links);
+
+		lock->waitMask |= LOCKBIT_ON(lockmode);
+
+		/* Set up wait information in PGPROC object, too */
+		MyProc->waitLock = lock;
+		MyProc->waitProcLock = proclock;
+		MyProc->waitLockMode = lockmode;
 
-	lock->waitMask |= LOCKBIT_ON(lockmode);
+		MyProc->waitStatus = PROC_WAIT_STATUS_WAITING;
+	}
+	return PROC_WAIT_STATUS_WAITING;
+}
 
-	/* Set up wait information in PGPROC object, too */
-	MyProc->waitLock = lock;
-	MyProc->waitProcLock = proclock;
-	MyProc->waitLockMode = lockmode;
 
-	MyProc->waitStatus = PROC_WAIT_STATUS_WAITING;
+/*
+ * ProcSleep -- put a process to sleep on the specified lock
+ *
+ * The lock table's partition lock must be held at entry, and will be held
+ * at exit.
+ *
+ * Result: PROC_WAIT_STATUS_OK if we acquired the lock, PROC_WAIT_STATUS_ERROR if not (deadlock).
+ *
+ * ASSUME: that no one will fiddle with the queue until after
+ *		we release the partition lock.
+ *
+ * NOTES: The process queue is now a priority queue for locking.
+ */
+ProcWaitStatus
+ProcSleep(LOCALLOCK *locallock, bool early_deadlock)
+{
+	LOCKMODE	lockmode = locallock->tag.mode;
+	LOCK	   *lock = locallock->lock;
+	uint32		hashcode = locallock->hashcode;
+	LWLock	   *partitionLock = LockHashPartitionLock(hashcode);
+	TimestampTz standbyWaitStart = 0;
+	bool		allow_autovacuum_cancel = true;
+	bool		logged_recovery_conflict = false;
+	ProcWaitStatus myWaitStatus;
 
 	/*
 	 * If we detected deadlock, give up without waiting.  This must agree with
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 4bc226e36c..5e57e09180 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -449,7 +449,9 @@ extern int	GetStartupBufferPinWaitBufId(void);
 extern bool HaveNFreeProcs(int n, int *nfree);
 extern void ProcReleaseLocks(bool isCommit);
 
-extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable);
+extern ProcWaitStatus InsertSelfIntoWaitQueue(LOCALLOCK *locallock, 
+        LockMethod lockMethodTable, bool dontWait, bool *early_deadlock);
+extern ProcWaitStatus ProcSleep(LOCALLOCK *locallock, bool early_deadlock);
 extern void ProcWakeup(PGPROC *proc, ProcWaitStatus waitStatus);
 extern void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock);
 extern void CheckDeadLockAlert(void);
diff --git a/src/test/isolation/expected/lock-nowait.out b/src/test/isolation/expected/lock-nowait.out
new file mode 100644
index 0000000000..2dc5aad6f0
--- /dev/null
+++ b/src/test/isolation/expected/lock-nowait.out
@@ -0,0 +1,9 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s1a s2a s1b s1c s2c
+step s1a: LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE;
+step s2a: LOCK TABLE a1 IN EXCLUSIVE MODE; <waiting ...>
+step s1b: LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT;
+step s1c: COMMIT;
+step s2a: <... completed>
+step s2c: COMMIT;
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index b2be88ead1..188fc04f85 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -111,3 +111,4 @@ test: serializable-parallel
 test: serializable-parallel-2
 test: serializable-parallel-3
 test: matview-write-skew
+test: lock-nowait
diff --git a/src/test/isolation/specs/lock-nowait.spec b/src/test/isolation/specs/lock-nowait.spec
new file mode 100644
index 0000000000..aaa0779d8b
--- /dev/null
+++ b/src/test/isolation/specs/lock-nowait.spec
@@ -0,0 +1,28 @@
+# While requesting nowait lock, if the lock requested should
+# be inserted in front of some waiter, check to see if the lock
+# conflicts with already-held locks or the requests before
+# the waiter. If not, then just grant myself the requested 
+# lock immediately.  Test this scenario.
+
+setup
+{
+  CREATE TABLE a1 ();
+}
+
+teardown
+{
+  DROP TABLE a1;
+}
+
+session s1
+setup		{ BEGIN; }
+step s1a	{ LOCK TABLE a1 IN ACCESS EXCLUSIVE MODE; }
+step s1b	{ LOCK TABLE a1 IN SHARE ROW EXCLUSIVE MODE NOWAIT; }
+step s1c	{ COMMIT; }
+
+session s2
+setup		{ BEGIN; }
+step s2a	{ LOCK TABLE a1 IN EXCLUSIVE MODE; }
+step s2c	{ COMMIT; }
+
+permutation s1a s2a s1b s1c s2c
-- 
2.37.1.windows.1

