From 38dc550f363c1c8878fbf06d5616614a36112fac Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Mon, 26 Oct 2020 01:55:31 +0300
Subject: [PATCH v1106 3/4] Add conditional variable to wait for next MultXact
 offset in corner case

GetMultiXactIdMembers() has a corner case, when the next multixact offset is
not yet set.  In this case GetMultiXactIdMembers() has to sleep till this offset
is set.  Currently the sleeping is implemented in naive way using pg_sleep()
and retry.  This commit implements sleeping with conditional variable, which
provides more efficient way for waiting till the event.

Discussion: https://postgr.es/m/a7f1c4e1-1015-92a4-2bd4-6736bd13d03e%40postgrespro.ru#c496c4e75fc0605094a0e1f763e6a6ec
Author: Andrey Borodin
Reviewed-by: Kyotaro Horiguchi, Daniel Gustafsson
Reviewed-by: Anastasia Lubennikova, Alexander Korotkov
---
 src/backend/access/transam/multixact.c | 35 ++++++++++++++++++++++++--
 src/backend/postmaster/pgstat.c        |  3 +++
 src/include/pgstat.h                   |  3 ++-
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 6203be0aa3..5d2bbb1ca6 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -82,6 +82,7 @@
 #include "lib/ilist.h"
 #include "miscadmin.h"
 #include "pg_trace.h"
+#include "pgstat.h"
 #include "postmaster/autovacuum.h"
 #include "storage/lmgr.h"
 #include "storage/pmsignal.h"
@@ -233,6 +234,13 @@ typedef struct MultiXactStateData
 	/* support for members anti-wraparound measures */
 	MultiXactOffset offsetStopLimit;	/* known if oldestOffsetKnown */
 
+	/*
+	 * Conditional variable for waiting till the filling of the next multixact
+	 * will be finished.  See GetMultiXactIdMembers() and RecordNewMultiXact()
+	 * for details.
+	 */
+	ConditionVariable nextoffCV;
+
 	/*
 	 * Per-backend data starts here.  We have two arrays stored in the area
 	 * immediately following the MultiXactStateData struct. Each is indexed by
@@ -871,6 +879,14 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
 	/* Exchange our lock */
 	LWLockRelease(MultiXactOffsetControlLock);
 
+	/*
+	 * Let everybody know the offset of this mxid is recorded now.  The
+	 * waiters are waiting for the offset of the mxid next of the target to
+	 * know the number of members of the target mxid, so we don't need to wait
+	 * for members of this mxid are recorded.
+	 */
+	ConditionVariableBroadcast(&MultiXactState->nextoffCV);
+
 	LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
 
 	prev_pageno = -1;
@@ -1368,9 +1384,23 @@ retry:
 		if (nextMXOffset == 0)
 		{
 			/* Corner case 2: next multixact is still being filled in */
+
+			/*
+			 * The recorder of the next mxid is just before writing the
+			 * offset. Wait for the offset to be written.
+			 */
+			ConditionVariablePrepareToSleep(&MultiXactState->nextoffCV);
+
+			/*
+			 * We don't have to recheck if multixact was filled in during
+			 * ConditionVariablePrepareToSleep(), because we were holding
+			 * MultiXactOffsetSLRULock.
+			 */
 			LWLockRelease(MultiXactOffsetControlLock);
-			CHECK_FOR_INTERRUPTS();
-			pg_usleep(1000L);
+
+			ConditionVariableSleep(&MultiXactState->nextoffCV,
+								   WAIT_EVENT_WAIT_NEXT_MXMEMBERS);
+			ConditionVariableCancelSleep();
 			goto retry;
 		}
 
@@ -1849,6 +1879,7 @@ MultiXactShmemInit(void)
 
 		/* Make sure we zero out the per-backend state */
 		MemSet(MultiXactState, 0, SHARED_MULTIXACT_STATE_SIZE);
+		ConditionVariableInit(&MultiXactState->nextoffCV);
 	}
 	else
 		Assert(found);
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 4895354a28..8a8b9a4eef 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3730,6 +3730,9 @@ pgstat_get_wait_ipc(WaitEventIPC w)
 		case WAIT_EVENT_SYNC_REP:
 			event_name = "SyncRep";
 			break;
+		case WAIT_EVENT_WAIT_NEXT_MXMEMBERS:
+			event_name = "MultiXactWaitNextMembers";
+			break;
 			/* no default case, so that compiler will warn */
 	}
 
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 58e2e71c6f..e89d946669 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -832,7 +832,8 @@ typedef enum
 	WAIT_EVENT_REPLICATION_ORIGIN_DROP,
 	WAIT_EVENT_REPLICATION_SLOT_DROP,
 	WAIT_EVENT_SAFE_SNAPSHOT,
-	WAIT_EVENT_SYNC_REP
+	WAIT_EVENT_SYNC_REP,
+	WAIT_EVENT_WAIT_NEXT_MXMEMBERS
 } WaitEventIPC;
 
 /* ----------
-- 
2.24.3 (Apple Git-128)

