From 103bb567cf7c0e92a2f352cf2ad19d6d350cb8d7 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Sun, 31 Jul 2022 13:53:19 -0700
Subject: [PATCH v2 4/4] Avoid allocating MultiXacts during VACUUM.

Pass down vacuumlazy.c's OldestXmin cutoff to FreezeMultiXactId(), and
teach it the difference between OldestXmin and FreezeLimit.  Use this
high-level context to intelligently avoid allocating new MultiXactIds
during VACUUM operations.  We should always prefer to avoid allocating
new MultiXacts during VACUUM on general principle.  VACUUM is the only
mechanism that can claw back MultixactId space, so allowing VACUUM to
consume MultiXactId space (for any reason) adds to the risk that the
system will trigger the multiStopLimit wraparound protection mechanism.

FreezeMultiXactId() is now eager when it's cheap to process xmax, and
lazy when it's expensive/risky to process xmax (because an expensive
second pass that might result in allocating a new Multi is required).
We make a similar trade-off to the one made by vacuumlazy.c when a
cleanup lock isn't available on some heap page.  We can usually put off
freezing (for the time being) when it's inconvenient to proceed.  The
only downside to this approach is that it necessitates pushing back the
final relfrozenxid/relminmxid value that can be set in pg_class.
---
 src/backend/access/heap/heapam.c | 49 +++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 699a5acae..e18000d81 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6111,11 +6111,21 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
  * FRM_RETURN_IS_MULTI, since we only leave behind a MultiXactId for these.
  *
  * NB: Creates a _new_ MultiXactId when FRM_RETURN_IS_MULTI is set in "flags".
+ *
+ * Allocating new MultiXactIds during VACUUM is something that we always
+ * prefer to avoid.  Caller passes us down all the context required to do this
+ * on their behalf: cutoff_xid, cutoff_multi, limit_xid, and limit_multi.
+ *
+ * We must never leave behind XIDs/MXIDs from before the "limits" given.
+ * There is lots of leeway around XIDs/MXIDs >= caller's "limits", though.
+ * When it's possible to inexpensively process xmax right away, we're eager
+ * about it.  Otherwise we're lazy about it -- next time it might be cheap.
  */
 static TransactionId
 FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 				  TransactionId relfrozenxid, TransactionId relminmxid,
 				  TransactionId cutoff_xid, MultiXactId cutoff_multi,
+				  TransactionId limit_xid, MultiXactId limit_multi,
 				  uint16 *flags, TransactionId *mxid_oldest_xid_out)
 {
 	TransactionId xid = InvalidTransactionId;
@@ -6208,13 +6218,16 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 	}
 
 	/*
-	 * This multixact might have or might not have members still running, but
-	 * we know it's valid and is newer than the cutoff point for multis.
-	 * However, some member(s) of it may be below the cutoff for Xids, so we
+	 * Some member(s) of this Multi may be below the limit for Xids, so we
 	 * need to walk the whole members array to figure out what to do, if
 	 * anything.
+	 *
+	 * We use limit_xid for this (VACUUM's FreezeLimit), rather than using
+	 * cutoff_xid (VACUUM's OldestXmin).  We greatly prefer to avoid a second
+	 * pass over the Multi that results in allocating a new replacement Multi.
 	 */
-
+	Assert(TransactionIdPrecedesOrEquals(limit_xid, cutoff_xid));
+	Assert(MultiXactIdPrecedesOrEquals(limit_multi, cutoff_multi));
 	nmembers =
 		GetMultiXactIdMembers(multi, &members, false,
 							  HEAP_XMAX_IS_LOCKED_ONLY(t_infomask));
@@ -6225,12 +6238,11 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 		return InvalidTransactionId;
 	}
 
-	/* is there anything older than the cutoff? */
 	need_replace = false;
-	temp_xid_out = *mxid_oldest_xid_out;	/* init for FRM_NOOP */
+	temp_xid_out = *mxid_oldest_xid_out;	/* init for FRM_NOOP optimization */
 	for (i = 0; i < nmembers; i++)
 	{
-		if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
+		if (TransactionIdPrecedes(members[i].xid, limit_xid))
 		{
 			need_replace = true;
 			break;
@@ -6239,11 +6251,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 			temp_xid_out = members[i].xid;
 	}
 
-	/*
-	 * In the simplest case, there is no member older than the cutoff; we can
-	 * keep the existing MultiXactId as-is, avoiding a more expensive second
-	 * pass over the multi
-	 */
+	/* The Multi itself must be >= limit_multi, too */
+	if (!need_replace)
+		need_replace = MultiXactIdPrecedes(multi, limit_multi);
+
 	if (!need_replace)
 	{
 		/*
@@ -6260,6 +6271,9 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 	 * Do a more thorough second pass over the multi to figure out which
 	 * member XIDs actually need to be kept.  Checking the precise status of
 	 * individual members might even show that we don't need to keep anything.
+	 *
+	 * We only reach this far when replacing xmax is absolutely mandatory.
+	 * heap_tuple_would_freeze will indicate that the tuple must be frozen.
 	 */
 	nnewmembers = 0;
 	newmembers = palloc(sizeof(MultiXactMember) * nmembers);
@@ -6359,7 +6373,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 				/*
 				 * Running locker cannot possibly be older than the cutoff.
 				 *
-				 * The cutoff is <= VACUUM's OldestXmin, which is also the
+				 * cutoff_xid is VACUUM's OldestXmin, which is also the
 				 * initial value used for top-level relfrozenxid_out tracking
 				 * state.  A running locker cannot be older than VACUUM's
 				 * OldestXmin, either, so we don't need a temp_xid_out step.
@@ -6529,7 +6543,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 	/*
 	 * Process xmax.  To thoroughly examine the current Xmax value we need to
 	 * resolve a MultiXactId to its member Xids, in case some of them are
-	 * below the given cutoff for Xids.  In that case, those values might need
+	 * below the given limit for Xids.  In that case, those values might need
 	 * freezing, too.  Also, if a multi needs freezing, we cannot simply take
 	 * it out --- if there's a live updater Xid, it needs to be kept.
 	 *
@@ -6547,6 +6561,7 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 		newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
 									relfrozenxid, relminmxid,
 									cutoff_xid, cutoff_multi,
+									limit_xid, limit_multi,
 									&flags, &mxid_oldest_xid_out);
 
 		freeze_xmax = (flags & FRM_INVALIDATE_XMAX);
@@ -6587,12 +6602,18 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 			 * MultiXactId, to carry forward two or more original member XIDs.
 			 * Might have to ratchet back relfrozenxid_out here, though never
 			 * relminmxid_out.
+			 *
+			 * We only do this when we have no choice; heap_tuple_would_freeze
+			 * will definitely force the page to be frozen below.
 			 */
 			Assert(!freeze_xmax);
 			Assert(MultiXactIdIsValid(newxmax));
 			Assert(!MultiXactIdPrecedes(newxmax, xtrack->relminmxid_out));
 			Assert(TransactionIdPrecedesOrEquals(mxid_oldest_xid_out,
 												 xtrack->relfrozenxid_out));
+			Assert(heap_tuple_would_freeze(tuple, limit_xid, limit_multi,
+										   &xtrack->relfrozenxid_nofreeze_out,
+										   &xtrack->relminmxid_nofreeze_out));
 			xtrack->relfrozenxid_out = mxid_oldest_xid_out;
 
 			/*
-- 
2.34.1

