From 092ab90ce6cb60bf8a09c3dd1909b7ed6814d49a Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Fri, 13 Jan 2023 15:23:11 -0800
Subject: [PATCH v4 2/2] Add "table age" trigger concept to autovacuum.

Teach autovacuum.c to launch "table age" autovacuums at the same point
that it previously triggered antiwraparound autovacuums.  Antiwraparound
autovacuums are retained, but are only used as a true option of last
resort, when regular autovacuum has presumably tried and failed to
advance relfrozenxid (likely because the auto-cancel behavior kept
cancelling regular autovacuums triggered based on table age).

The special auto-cancellation behavior applied by antiwraparound
autovacuums is known to cause problems in the field, so it makes sense
to avoid it, at least until the point where it starts to look like a
proportionate response.  Besides, the risk of the system eventually
triggering xidStopLimit because of cancellations is a lot lower than it
was back when the current auto-cancellation behavior was added by commit
acac68b2.  For example, there was no visibility map, so restarting
antiwraparound autovacuum meant that the next autovacuum would get very
little benefit from the work performed by earlier cancelled autovacuums.

Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Jeff Davis <pgsql@j-davis.com>
Discussion: https://postgr.es/m/CAH2-Wz=S-R_2rO49Hm94Nuvhu9_twRGbTm6uwDRmRu-Sqn_t3w@mail.gmail.com
---
 src/include/storage/proc.h              |  2 +-
 src/backend/access/heap/visibilitymap.c |  5 +--
 src/backend/access/transam/multixact.c  |  4 +--
 src/backend/commands/vacuum.c           |  3 +-
 src/backend/postmaster/autovacuum.c     | 46 +++++++++++++++++++++++--
 src/backend/storage/lmgr/proc.c         |  4 +--
 6 files changed, 54 insertions(+), 10 deletions(-)

diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index b5c6f46d0..8a92a9fe5 100644
--- a/src/include/storage/proc.h
+++ b/src/include/storage/proc.h
@@ -59,7 +59,7 @@ struct XidCache
 										 * CONCURRENTLY or REINDEX
 										 * CONCURRENTLY on non-expressional,
 										 * non-partial index */
-#define		PROC_VACUUM_FOR_WRAPAROUND	0x08	/* set by autovac only */
+#define		PROC_VACUUM_FOR_WRAPAROUND	0x08	/* emergency autovac */
 #define		PROC_IN_LOGICAL_DECODING	0x10	/* currently doing logical
 												 * decoding outside xact */
 #define		PROC_AFFECTS_ALL_HORIZONS	0x20	/* this proc's xmin must be
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 1d1ca423a..8a0ed5a06 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -26,8 +26,9 @@
  * per heap page. A set all-visible bit means that all tuples on the page are
  * known visible to all transactions, and therefore the page doesn't need to
  * be vacuumed. A set all-frozen bit means that all tuples on the page are
- * completely frozen, and therefore the page doesn't need to be vacuumed even
- * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
+ * completely frozen.  VACUUM doesn't give up the right to advance the rel's
+ * relfrozenxid/relminmxid just by skipping its all-frozen pages; it need only
+ * scan those pages that might have remaining unfrozen XIDs or MultiXactIds.
  * The all-frozen bit must be set only when the page is already all-visible.
  *
  * The map is conservative in the sense that we make sure that whenever a bit
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index e75e1fdf7..c0ee3876f 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -2553,8 +2553,8 @@ GetOldestMultiXactId(void)
  * info in MultiXactState, where it can be used to prevent overrun of old data
  * in the members SLRU area.
  *
- * The return value is true if emergency autovacuum is required and false
- * otherwise.
+ * The return value is true if emergency offset autovacuum (which appears as a
+ * table MXID age autovacuum to users) is required, and false otherwise.
  */
 static bool
 SetOffsetVacuumLimit(bool is_startup)
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index d17fbab17..2a4950df5 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1859,7 +1859,8 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 		 *
 		 * We also set the VACUUM_FOR_WRAPAROUND flag, which is passed down by
 		 * autovacuum; it's used to avoid canceling a vacuum that was invoked
-		 * in an emergency.
+		 * because no earlier vacuum (in particular no earlier "table age"
+		 * autovacuum) ran and advanced relfrozenxid/relminmxid.
 		 *
 		 * Note: these flags remain set until CommitTransaction or
 		 * AbortTransaction.  We don't want to clear them until we reset
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index d428ef066..016261f7e 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -135,6 +135,8 @@ int			Log_autovacuum_min_duration = 600000;
 #define MIN_AUTOVAC_SLEEPTIME 100.0 /* milliseconds */
 #define MAX_AUTOVAC_SLEEPTIME 300	/* seconds */
 
+#define ANTIWRAPAROUND_MAX_AGE 1000000000	/* one billion XIDs/MXIDs */
+
 /* Flags to tell if we are in an autovacuum process */
 static bool am_autovacuum_launcher = false;
 static bool am_autovacuum_worker = false;
@@ -3099,8 +3101,48 @@ relation_needs_vacanalyze(Oid relid,
 		return AUTOVACUUM_NONE;
 	}
 
-	/* A table age autovacuum always gets antiwraparound protections */
-	*wraparound = tableagevac;
+	/*
+	 * If we're forcing table age autovacuum, are we at the point where it has
+	 * to be an antiwraparound autovacuum?
+	 *
+	 * Antiwraparound autovacuums are different to other autovacuums in that
+	 * they cannot be automatically canceled, and are described directly in
+	 * pg_stat_activity.  They're used only in emergencies, when no earlier
+	 * standard table age autovacuum could complete and advance the table's
+	 * relfrozenxid/relminmxid, despite an ample table age autovacuum window.
+	 */
+	if (tableagevac)
+	{
+		/*
+		 * Double the table age to determine the cutoff for antiwraparound.
+		 * This gives standard autovacuuming plenty of space to succeed, so we
+		 * can be relatively confident that that hasn't and won't work out by
+		 * the time antiwraparound mode finally starts to trigger.
+		 *
+		 * Don't ever put off antiwraparound autovacuuming past the point
+		 * where relfrozenxid has already attained an age >= 1 billion XIDs,
+		 * or where relminmxid has already attained an age >= 1 billion MXIDs.
+		 */
+		if (freeze_max_age < ANTIWRAPAROUND_MAX_AGE)
+			freeze_max_age *= 2;
+		freeze_max_age = Min(freeze_max_age, ANTIWRAPAROUND_MAX_AGE);
+		if (multixact_freeze_max_age < ANTIWRAPAROUND_MAX_AGE)
+			multixact_freeze_max_age *= 2;
+		multixact_freeze_max_age = Min(multixact_freeze_max_age,
+									   ANTIWRAPAROUND_MAX_AGE);
+
+		/* Similar test to before, but with double the max age */
+		xidAgeLimit = recentXid - freeze_max_age;
+		if (xidAgeLimit < FirstNormalTransactionId)
+			xidAgeLimit -= FirstNormalTransactionId;
+		multiAgeLimit = recentMulti - multixact_freeze_max_age;
+		if (multiAgeLimit < FirstMultiXactId)
+			multiAgeLimit -= FirstMultiXactId;
+		*wraparound = ((TransactionIdIsNormal(relfrozenxid) &&
+						TransactionIdPrecedes(relfrozenxid, xidAgeLimit)) ||
+					   (MultiXactIdIsValid(relminmxid) &&
+						MultiXactIdPrecedes(relminmxid, multiAgeLimit)));
+	}
 
 	/*
 	 * If we found stats for the table, and autovacuum is currently enabled,
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 00d26dc0f..dc66b9af0 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -1384,8 +1384,8 @@ ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable)
 			LWLockRelease(ProcArrayLock);
 
 			/*
-			 * Only do it if the worker is not working to protect against Xid
-			 * wraparound.
+			 * Only do it if the worker is not an antiwraparound autovacuum, a
+			 * special type of autovacuum that is only used in emergencies
 			 */
 			if ((statusFlags & PROC_IS_AUTOVACUUM) &&
 				!(statusFlags & PROC_VACUUM_FOR_WRAPAROUND))
-- 
2.39.0

