From 580a569694a28e72b75c85031ef149e3397b006f Mon Sep 17 00:00:00 2001
From: Hayato Kuroda <kuroda.hayato@fujitsu.com>
Date: Fri, 13 Sep 2024 07:52:40 +0000
Subject: [PATCH] XXX: try to add vacuum_committs_age
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The patch adds a GUC vacuum_committs_age to prevent dead rows from being
removed if the age of the delete transaction (xmax) has not exceeded the
vacuum_committs_age threshold. E.g. , it ensures the row is retained if
now() - commit_timestamp_of_xmax < vacuum_committs_age.

XXX please note that the patch is still unfinished due to a few
challenging issues that need to be addressed. For instance: We need to prevent
relfrozenxid/datfrozenxid from being advanced in both aggressive and
non-aggressive vacuum modes. Otherwise, the commit timestamp data is cleaned
up, and we won’t be able to compute the age of a tuple.

XXX the patch has a noticeable performance impact on vacuum
operations when rows in a table are deleted by multiple transactions.
---
 src/backend/access/heap/heapam.c       | 12 ++++++++--
 src/backend/access/heap/pruneheap.c    | 32 ++++++++++++++++++++++----
 src/backend/access/transam/commit_ts.c | 25 ++++++++++++++++++++
 src/backend/commands/vacuum.c          |  1 +
 src/backend/utils/misc/guc_tables.c    | 11 +++++++++
 src/include/access/heapam.h            |  1 +
 src/include/commands/vacuum.h          |  1 +
 7 files changed, 76 insertions(+), 7 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index f167107257..804c53b6ca 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -6531,6 +6531,7 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
 bool
 heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 						  const struct VacuumCutoffs *cutoffs,
+						  bool xmax_aged,
 						  HeapPageFreeze *pagefrz,
 						  HeapTupleFreeze *frz, bool *totally_frozen)
 {
@@ -6593,7 +6594,14 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 
 	/* Now process xmax */
 	xid = frz->xmax;
-	if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
+	if (!xmax_aged)
+	{
+		/*
+		 * Skip freezing if the commit timestamp of xmax is within the
+		 * vacuum_committs_age threshold.
+		 */
+	}
+	else if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
 	{
 		/* Raw xmax is a MultiXactId */
 		TransactionId newxmax;
@@ -6900,7 +6908,7 @@ heap_freeze_tuple(HeapTupleHeader tuple,
 	pagefrz.NoFreezePageRelfrozenXid = FreezeLimit;
 	pagefrz.NoFreezePageRelminMxid = MultiXactCutoff;
 
-	do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs,
+	do_freeze = heap_prepare_freeze_tuple(tuple, &cutoffs, false,
 										  &pagefrz, &frz, &totally_frozen);
 
 	/*
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index 869d82ad66..b5afe671bf 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -14,6 +14,7 @@
  */
 #include "postgres.h"
 
+#include "access/commit_ts.h"
 #include "access/heapam.h"
 #include "access/heapam_xlog.h"
 #include "access/htup_details.h"
@@ -97,6 +98,8 @@ typedef struct
 	 */
 	int8		htsv[MaxHeapTuplesPerPage + 1];
 
+	bool		commit_ts_aged[MaxHeapTuplesPerPage + 1];
+
 	/*
 	 * Freezing-related state.
 	 */
@@ -155,7 +158,9 @@ typedef struct
 /* Local functions */
 static HTSV_Result heap_prune_satisfies_vacuum(PruneState *prstate,
 											   HeapTuple tup,
-											   Buffer buffer);
+											   Buffer buffer,
+											   TimestampTz now,
+											   bool *commit_ts_aged);
 static inline HTSV_Result htsv_get_valid_status(int status);
 static void heap_prune_chain(Page page, BlockNumber blockno, OffsetNumber maxoff,
 							 OffsetNumber rootoffnum, PruneState *prstate);
@@ -367,6 +372,7 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	bool		do_prune;
 	bool		do_hint;
 	bool		hint_bit_fpi;
+	TimestampTz	now;
 	int64		fpi_before = pgWalUsage.wal_fpi;
 
 	/* Copy parameters to prstate */
@@ -469,6 +475,8 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 	maxoff = PageGetMaxOffsetNumber(page);
 	tup.t_tableOid = RelationGetRelid(relation);
 
+	now = GetCurrentTimestamp();
+
 	/*
 	 * Determine HTSV for all tuples, and queue them up for processing as HOT
 	 * chain roots or as heap-only items.
@@ -543,7 +551,8 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
 		ItemPointerSet(&tup.t_self, blockno, offnum);
 
 		prstate.htsv[offnum] = heap_prune_satisfies_vacuum(&prstate, &tup,
-														   buffer);
+														   buffer, now,
+														   &prstate.commit_ts_aged[offnum]);
 
 		if (!HeapTupleHeaderIsHeapOnly(htup))
 			prstate.root_items[prstate.nroot_items++] = offnum;
@@ -914,23 +923,35 @@ heap_page_prune_and_freeze(Relation relation, Buffer buffer,
  * Perform visibility checks for heap pruning.
  */
 static HTSV_Result
-heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
+heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer,
+							TimestampTz now, bool *commit_ts_aged)
 {
 	HTSV_Result res;
 	TransactionId dead_after;
+	TimestampTz	commit_ts = (int64) 0;
+
+	*commit_ts_aged = true;
 
 	res = HeapTupleSatisfiesVacuumHorizon(tup, buffer, &dead_after);
 
 	if (res != HEAPTUPLE_RECENTLY_DEAD)
 		return res;
 
+	if (track_commit_timestamp && vacuum_committs_age)
+	{
+		TransactionIdGetCommitTsData(dead_after, &commit_ts, NULL);
+
+		*commit_ts_aged = TimestampDifferenceExceeds(commit_ts, now,
+						  vacuum_committs_age * 1000);
+	}
+
 	/*
 	 * For VACUUM, we must be sure to prune tuples with xmax older than
 	 * OldestXmin -- a visibility cutoff determined at the beginning of
 	 * vacuuming the relation. OldestXmin is used for freezing determination
 	 * and we cannot freeze dead tuples' xmaxes.
 	 */
-	if (prstate->cutoffs &&
+	if (*commit_ts_aged && prstate->cutoffs &&
 		TransactionIdIsValid(prstate->cutoffs->OldestXmin) &&
 		NormalTransactionIdPrecedes(dead_after, prstate->cutoffs->OldestXmin))
 		return HEAPTUPLE_DEAD;
@@ -943,7 +964,7 @@ heap_prune_satisfies_vacuum(PruneState *prstate, HeapTuple tup, Buffer buffer)
 	 * if the GlobalVisState has been updated since the beginning of vacuuming
 	 * the relation.
 	 */
-	if (GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
+	if (*commit_ts_aged && GlobalVisTestIsRemovableXid(prstate->vistest, dead_after))
 		return HEAPTUPLE_DEAD;
 
 	return res;
@@ -1482,6 +1503,7 @@ heap_prune_record_unchanged_lp_normal(Page page, PruneState *prstate, OffsetNumb
 
 		if ((heap_prepare_freeze_tuple(htup,
 									   prstate->cutoffs,
+									   prstate->commit_ts_aged[offnum],
 									   &prstate->pagefrz,
 									   &prstate->frozen[prstate->nfrozen],
 									   &totally_frozen)))
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c
index 77e1899d7a..8b71bc3245 100644
--- a/src/backend/access/transam/commit_ts.c
+++ b/src/backend/access/transam/commit_ts.c
@@ -121,6 +121,10 @@ static void DeactivateCommitTs(void);
 static void WriteZeroPageXlogRec(int64 pageno);
 static void WriteTruncateXlogRec(int64 pageno, TransactionId oldestXid);
 
+static TransactionId cached_xid = InvalidTransactionId;
+static TimestampTz cached_ts = 0;
+static RepOriginId cached_nodeid = InvalidRepOriginId;
+
 /*
  * TransactionTreeSetCommitTsData
  *
@@ -212,6 +216,9 @@ TransactionTreeSetCommitTsData(TransactionId xid, int nsubxids,
 	if (TransactionIdPrecedes(TransamVariables->newestCommitTsXid, newestXact))
 		TransamVariables->newestCommitTsXid = newestXact;
 	LWLockRelease(CommitTsLock);
+
+	cached_xid = xid;
+	cached_ts = timestamp;
 }
 
 /*
@@ -293,6 +300,14 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 			*nodeid = 0;
 		return false;
 	}
+	else if (xid == cached_xid)
+	{
+		*ts = cached_ts;
+		if (nodeid)
+			*nodeid = cached_nodeid;
+
+		return *ts != 0;
+	}
 
 	LWLockAcquire(CommitTsLock, LW_SHARED);
 
@@ -311,6 +326,11 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 			*nodeid = commitTsShared->dataLastCommit.nodeid;
 
 		LWLockRelease(CommitTsLock);
+
+		cached_xid = xid;
+		cached_ts = *ts;
+		cached_nodeid = commitTsShared->dataLastCommit.nodeid;
+
 		return *ts != 0;
 	}
 
@@ -345,6 +365,11 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts,
 		*nodeid = entry.nodeid;
 
 	LWLockRelease(SimpleLruGetBankLock(CommitTsCtl, pageno));
+
+	cached_xid = xid;
+	cached_ts = *ts;
+	cached_nodeid = entry.nodeid;
+
 	return *ts != 0;
 }
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 7d8e9d2045..e96808e3dc 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -70,6 +70,7 @@ int			vacuum_multixact_freeze_min_age;
 int			vacuum_multixact_freeze_table_age;
 int			vacuum_failsafe_age;
 int			vacuum_multixact_failsafe_age;
+int			vacuum_committs_age;
 
 /*
  * Variables for cost-based vacuum delay. The defaults differ between
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index 686309db58..239e413a55 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3714,6 +3714,17 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"vacuum_committs_age", PGC_SIGHUP, RESOURCES_DISK,
+			gettext_noop("Age at which VACUUM should *not* cleanup the dead tuple"),
+			NULL,
+			GUC_UNIT_S
+		},
+		&vacuum_committs_age,
+		0, 0, 86400,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index b92eb506ec..846faa1595 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -340,6 +340,7 @@ extern TM_Result heap_lock_tuple(Relation relation, HeapTuple tuple,
 extern void heap_inplace_update(Relation relation, HeapTuple tuple);
 extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 									  const struct VacuumCutoffs *cutoffs,
+									  bool xmax_aged,
 									  HeapPageFreeze *pagefrz,
 									  HeapTupleFreeze *frz, bool *totally_frozen);
 
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 759f9a87d3..fadedeec28 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -296,6 +296,7 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_min_age;
 extern PGDLLIMPORT int vacuum_multixact_freeze_table_age;
 extern PGDLLIMPORT int vacuum_failsafe_age;
 extern PGDLLIMPORT int vacuum_multixact_failsafe_age;
+extern PGDLLIMPORT int vacuum_committs_age;
 
 /*
  * Maximum value for default_statistics_target and per-column statistics
-- 
2.30.0.windows.2

