From b09327137062cd88239c687ff2b3f99833a200a0 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Wed, 9 Apr 2025 23:17:25 +0200
Subject: [PATCH v00] WIP: Optimize VACUUM for tables with only summarizing
 indexes

This should reduce their IO requirements by a nice margin. Even though
these tables to exist, they're probably more likely than large tables
with no indexes at all, so this is probably a net win.
---
 src/include/commands/vacuum.h         | 10 ++++-----
 src/backend/access/heap/vacuumlazy.c  | 31 +++++++++++++++++++--------
 src/backend/commands/analyze.c        |  2 +-
 src/backend/commands/vacuum.c         | 10 ++++++++-
 src/backend/commands/vacuumparallel.c | 12 ++++++++---
 5 files changed, 46 insertions(+), 19 deletions(-)

diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index bc37a80dc74..88e5973ad42 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -340,7 +340,8 @@ extern void vacuum(List *relations, VacuumParams *params,
 				   BufferAccessStrategy bstrategy, MemoryContext vac_context,
 				   bool isTopLevel);
 extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
-							 int *nindexes, Relation **Irel);
+							 int *nindexes, bool *allsumindexes,
+							 Relation **Irel);
 extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
 extern double vac_estimate_reltuples(Relation relation,
 									 BlockNumber total_pages,
@@ -379,10 +380,9 @@ extern void AutoVacuumUpdateCostLimit(void);
 extern void VacuumUpdateCosts(void);
 
 /* in commands/vacuumparallel.c */
-extern ParallelVacuumState *parallel_vacuum_init(Relation rel, Relation *indrels,
-												 int nindexes, int nrequested_workers,
-												 int vac_work_mem, int elevel,
-												 BufferAccessStrategy bstrategy);
+extern ParallelVacuumState *
+parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes, bool indallsummarizing, int nrequested_workers,
+					 int vac_work_mem, int elevel, BufferAccessStrategy bstrategy);
 extern void parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats);
 extern TidStore *parallel_vacuum_get_dead_items(ParallelVacuumState *pvs,
 												VacDeadItemsInfo **dead_items_info_p);
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f28326bad09..9408f89b197 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -263,6 +263,14 @@ typedef struct LVRelState
 	Relation   *indrels;
 	int			nindexes;
 
+	/*
+	 * indallsummarizing is true if nindexes == 0, or if all indexes are
+	 * summarizing (and thus don't need to be informed about tuple deletions).
+	 * This allows us to apply the single-heapscan vacuum optimization when
+	 * all indexes on the table are summarizing.
+	 */
+	bool		indallsummarizing;
+
 	/* Buffer access strategy and parallel vacuum state */
 	BufferAccessStrategy bstrategy;
 	ParallelVacuumState *pvs;
@@ -680,7 +688,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	/* Set up high level stuff about rel and its indexes */
 	vacrel->rel = rel;
 	vac_open_indexes(vacrel->rel, RowExclusiveLock, &vacrel->nindexes,
-					 &vacrel->indrels);
+					 &vacrel->indallsummarizing, &vacrel->indrels);
 	vacrel->bstrategy = bstrategy;
 	if (instrument && vacrel->nindexes > 0)
 	{
@@ -1460,7 +1468,7 @@ lazy_scan_heap(LVRelState *vacrel)
 		 * revisit this page. Since updating the FSM is desirable but not
 		 * absolutely required, that's OK.
 		 */
-		if (vacrel->nindexes == 0
+		if (vacrel->indallsummarizing
 			|| !vacrel->do_index_vacuuming
 			|| !has_lpdead_items)
 		{
@@ -1475,7 +1483,7 @@ lazy_scan_heap(LVRelState *vacrel)
 			 * table has indexes. There will only be newly-freed space if we
 			 * held the cleanup lock and lazy_scan_prune() was called.
 			 */
-			if (got_cleanup_lock && vacrel->nindexes == 0 && has_lpdead_items &&
+			if (got_cleanup_lock && vacrel->indallsummarizing && has_lpdead_items &&
 				blkno - next_fsm_block_to_vacuum >= VACUUM_FSM_EVERY_PAGES)
 			{
 				FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
@@ -1960,8 +1968,8 @@ lazy_scan_prune(LVRelState *vacrel,
 	/*
 	 * Prune all HOT-update chains and potentially freeze tuples on this page.
 	 *
-	 * If the relation has no indexes, we can immediately mark would-be dead
-	 * items LP_UNUSED.
+	 * If the relation has no indexes, or only summarizing indexes, we can
+	 * immediately mark would-be dead items LP_UNUSED.
 	 *
 	 * The number of tuples removed from the page is returned in
 	 * presult.ndeleted.  It should not be confused with presult.lpdead_items;
@@ -1973,7 +1981,7 @@ lazy_scan_prune(LVRelState *vacrel,
 	 * all-visible.
 	 */
 	prune_options = HEAP_PAGE_PRUNE_FREEZE;
-	if (vacrel->nindexes == 0)
+	if (vacrel->indallsummarizing)
 		prune_options |= HEAP_PAGE_PRUNE_MARK_UNUSED_NOW;
 
 	heap_page_prune_and_freeze(rel, buf, vacrel->vistest, prune_options,
@@ -2381,7 +2389,7 @@ lazy_scan_noprune(LVRelState *vacrel,
 	vacrel->NewRelminMxid = NoFreezePageRelminMxid;
 
 	/* Save any LP_DEAD items found on the page in dead_items */
-	if (vacrel->nindexes == 0)
+	if (vacrel->indallsummarizing)
 	{
 		/* Using one-pass strategy (since table has no indexes) */
 		if (lpdead_items > 0)
@@ -2536,8 +2544,12 @@ lazy_vacuum(LVRelState *vacrel)
 		/*
 		 * We successfully completed a round of index vacuuming.  Do related
 		 * heap vacuuming now.
+		 *
+		 * If all valid indexes are summarizing, then the TIDs have already
+		 * been reclaimed, requiring us to skip that last phase.
 		 */
-		lazy_vacuum_heap_rel(vacrel);
+		if (!vacrel->indallsummarizing)
+			lazy_vacuum_heap_rel(vacrel);
 	}
 	else
 	{
@@ -3511,7 +3523,8 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 		}
 		else
 			vacrel->pvs = parallel_vacuum_init(vacrel->rel, vacrel->indrels,
-											   vacrel->nindexes, nworkers,
+											   vacrel->nindexes,
+											   vacrel->indallsummarizing, nworkers,
 											   vac_work_mem,
 											   vacrel->verbose ? INFO : DEBUG2,
 											   vacrel->bstrategy);
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 4fffb76e557..c846f42a06c 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -435,7 +435,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	}
 	else if (!inh)
 	{
-		vac_open_indexes(onerel, AccessShareLock, &nindexes, &Irel);
+		vac_open_indexes(onerel, AccessShareLock, &nindexes, NULL, &Irel);
 		hasindex = nindexes > 0;
 	}
 	else
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index db5da3ce826..0c876260d05 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -2336,11 +2336,13 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
  */
 void
 vac_open_indexes(Relation relation, LOCKMODE lockmode,
-				 int *nindexes, Relation **Irel)
+				 int *nindexes, bool *indallsummarizing,
+				 Relation **Irel)
 {
 	List	   *indexoidlist;
 	ListCell   *indexoidscan;
 	int			i;
+	bool		allsummarizing = true;
 
 	Assert(lockmode != NoLock);
 
@@ -2363,13 +2365,19 @@ vac_open_indexes(Relation relation, LOCKMODE lockmode,
 
 		indrel = index_open(indexoid, lockmode);
 		if (indrel->rd_index->indisready)
+		{
 			(*Irel)[i++] = indrel;
+			allsummarizing &= indrel->rd_indam->amsummarizing;
+		}
 		else
 			index_close(indrel, lockmode);
 	}
 
 	*nindexes = i;
 
+	if (indallsummarizing)
+		*indallsummarizing = allsummarizing;
+
 	list_free(indexoidlist);
 }
 
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 2b9d548cdeb..7fe879c68e5 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -170,6 +170,7 @@ struct ParallelVacuumState
 	/* Target indexes */
 	Relation   *indrels;
 	int			nindexes;
+	bool		indallsummarizing;
 
 	/* Shared information among parallel vacuum workers */
 	PVShared   *shared;
@@ -241,8 +242,9 @@ static void parallel_vacuum_error_callback(void *arg);
  */
 ParallelVacuumState *
 parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
-					 int nrequested_workers, int vac_work_mem,
-					 int elevel, BufferAccessStrategy bstrategy)
+					 bool indallsummarizing, int nrequested_workers,
+					 int vac_work_mem, int elevel,
+					 BufferAccessStrategy bstrategy)
 {
 	ParallelVacuumState *pvs;
 	ParallelContext *pcxt;
@@ -282,6 +284,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	pvs = (ParallelVacuumState *) palloc0(sizeof(ParallelVacuumState));
 	pvs->indrels = indrels;
 	pvs->nindexes = nindexes;
+	pvs->indallsummarizing = nindexes;
 	pvs->will_parallel_vacuum = will_parallel_vacuum;
 	pvs->bstrategy = bstrategy;
 	pvs->heaprel = rel;
@@ -997,6 +1000,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
 	int			nindexes;
+	bool		indallsummarizing;
 	char	   *sharedquery;
 	ErrorContextCallback errcallback;
 
@@ -1029,7 +1033,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	 * Open all indexes. indrels are sorted in order by OID, which should be
 	 * matched to the leader's one.
 	 */
-	vac_open_indexes(rel, RowExclusiveLock, &nindexes, &indrels);
+	vac_open_indexes(rel, RowExclusiveLock, &nindexes,
+					 &indallsummarizing, &indrels);
 	Assert(nindexes > 0);
 
 	/*
@@ -1061,6 +1066,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	/* Set parallel vacuum state */
 	pvs.indrels = indrels;
 	pvs.nindexes = nindexes;
+	pvs.indallsummarizing = indallsummarizing;
 	pvs.indstats = indstats;
 	pvs.shared = shared;
 	pvs.dead_items = dead_items;
-- 
2.45.2

