From 5eb9e221c7cd025fa2d47e95ab7de78af0866c04 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Mon, 21 Jan 2019 19:07:44 +0900
Subject: [PATCH v7 1/2] Add DISABLE_INDEX_CLEANUP option to VACUUM command

With this option, VACUUM does HOT-pruning for live tuples but doesn't
remove dead tuples completely and disables index vacuum.

vacrelstats->dead_tuples could have tuples that became dead after
checked at a HOT-pruning time, which are not marked as dead. Per
discussion on pgsql-hackers We normally records and remove them but
with this option we don't process and leave for the next vacuum for
simplifing the code. That's okay because it's very rare condition and
those tuples will be processed by the next vacuum.
---
 doc/src/sgml/ref/vacuum.sgml         | 21 +++++++++-
 src/backend/access/heap/pruneheap.c  | 16 ++++---
 src/backend/access/heap/vacuumlazy.c | 81 +++++++++++++++++++++++++++---------
 src/backend/commands/vacuum.c        |  8 +++-
 src/backend/parser/gram.y            |  2 +
 src/include/access/heapam.h          |  3 +-
 src/include/nodes/parsenodes.h       |  3 +-
 src/test/regress/expected/vacuum.out |  3 ++
 src/test/regress/sql/vacuum.sql      |  3 ++
 9 files changed, 111 insertions(+), 29 deletions(-)

diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml
index fd911f5..f5cde2b 100644
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -31,6 +31,7 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     VERBOSE
     ANALYZE
     DISABLE_PAGE_SKIPPING
+    DISABLE_INDEX_CLEANUP
     SKIP_LOCKED
 
 <phrase>and <replaceable class="parameter">table_and_columns</replaceable> is:</phrase>
@@ -161,7 +162,25 @@ VACUUM [ FULL ] [ FREEZE ] [ VERBOSE ] [ ANALYZE ] [ <replaceable class="paramet
     </listitem>
    </varlistentry>
 
-   <varlistentry>
+    <varlistentry>
+    <term><literal>DISABLE_INDEX_CLEANUP</literal></term>
+    <listitem>
+     <para>
+      <command>VACUUM</command> removes dead tuples and prunes HOT-updated
+      tuples chain for live tuples on table. If the table has any dead tuple
+      it removes them from both table and indexes for re-use. With this
+      option <command>VACUUM</command> doesn't completely remove dead tuples
+      and disables removing dead tuples from indexes.  This is suitable for
+      avoiding transaction ID wraparound (see
+      <xref linkend="vacuum-for-wraparound"/>) but not sufficient for avoiding
+      index bloat. This option is ignored if the table doesn't have index.
+      This cannot be used in conjunction with <literal>FULL</literal>
+      option.
+     </para>
+    </listitem>
+   </varlistentry>
+
+    <varlistentry>
     <term><literal>SKIP_LOCKED</literal></term>
     <listitem>
      <para>
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index a3e5192..9db51e8 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -147,11 +147,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
 		 */
 		if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
 		{
-			TransactionId ignore = InvalidTransactionId;	/* return value not
-															 * needed */
+			/* return values not needed */
+			TransactionId ignore1 = InvalidTransactionId;
+			double ignore2 = 0;
 
 			/* OK to prune */
-			(void) heap_page_prune(relation, buffer, OldestXmin, true, &ignore);
+			(void) heap_page_prune(relation, buffer, OldestXmin, true,
+								   &ignore1, &ignore2);
 		}
 
 		/* And release buffer lock */
@@ -173,12 +175,13 @@ heap_page_prune_opt(Relation relation, Buffer buffer)
  * send its own new total to pgstats, and we don't want this delta applied
  * on top of that.)
  *
- * Returns the number of tuples deleted from the page and sets
- * latestRemovedXid.
+ * Returns the number of tuples deleted from the page and set latestRemoveXid
+ * and increment nunused.
  */
 int
 heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
-				bool report_stats, TransactionId *latestRemovedXid)
+				bool report_stats, TransactionId *latestRemovedXid,
+				double *nunused)
 {
 	int			ndeleted = 0;
 	Page		page = BufferGetPage(buffer);
@@ -302,6 +305,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
 		pgstat_update_heap_dead_tuples(relation, ndeleted - prstate.ndead);
 
 	*latestRemovedXid = prstate.latestRemovedXid;
+	*nunused += prstate.nunused;
 
 	/*
 	 * XXX Should we update the FSM information of this page ?
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 9416c31..4a922c1 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -112,7 +112,10 @@
 
 typedef struct LVRelStats
 {
-	/* hasindex = true means two-pass strategy; false means one-pass */
+	/*
+	 * hasindex = true means two-pass strategy; false means one-pass. But we
+	 * always use the one-pass strategy when index vacuum is disabled.
+	 */
 	bool		hasindex;
 	/* Overall statistics about rel */
 	BlockNumber old_rel_pages;	/* previous value of pg_class.relpages */
@@ -167,7 +170,8 @@ static bool should_attempt_truncation(LVRelStats *vacrelstats);
 static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats);
 static BlockNumber count_nondeletable_pages(Relation onerel,
 						 LVRelStats *vacrelstats);
-static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks);
+static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks,
+							 bool skip_index_vacuum);
 static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 					   ItemPointer itemptr);
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
@@ -485,13 +489,16 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 				live_tuples,	/* live tuples (reltuples estimate) */
 				tups_vacuumed,	/* tuples cleaned up by vacuum */
 				nkeep,			/* dead-but-not-removable tuples */
-				nunused;		/* unused item pointers */
+				nunused,		/* unused item pointers */
+				tups_pruned,	/* tuples marked as unused by HOT-pruning */
+				nleft;			/* item pointers we left */
 	IndexBulkDeleteResult **indstats;
 	int			i;
 	PGRUsage	ru0;
 	Buffer		vmbuffer = InvalidBuffer;
 	BlockNumber next_unskippable_block;
 	bool		skipping_blocks;
+	bool		skip_index_vacuum = (options & VACOPT_DISABLE_INDEX_CLEANUP) != 0;
 	xl_heap_freeze_tuple *frozen;
 	StringInfoData buf;
 	const int	initprog_index[] = {
@@ -517,7 +524,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 
 	empty_pages = vacuumed_pages = 0;
 	next_fsm_block_to_vacuum = (BlockNumber) 0;
-	num_tuples = live_tuples = tups_vacuumed = nkeep = nunused = 0;
+	num_tuples = live_tuples = tups_vacuumed = nkeep = nunused
+		= nleft = tups_pruned = 0;
 
 	indstats = (IndexBulkDeleteResult **)
 		palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
@@ -529,7 +537,7 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 	vacrelstats->nonempty_pages = 0;
 	vacrelstats->latestRemovedXid = InvalidTransactionId;
 
-	lazy_space_alloc(vacrelstats, nblocks);
+	lazy_space_alloc(vacrelstats, nblocks, skip_index_vacuum);
 	frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
 
 	/* Report that we're scanning the heap, advertising total # of blocks */
@@ -722,6 +730,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 			};
 			int64		hvp_val[2];
 
+			/* Index vacuum must be enabled in two-pass vacuum */
+			Assert(!skip_index_vacuum);
+
 			/*
 			 * Before beginning index vacuuming, we release any pin we may
 			 * hold on the visibility map page.  This isn't necessary for
@@ -951,7 +962,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 		 * We count tuples removed by the pruning step as removed by VACUUM.
 		 */
 		tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
-										 &vacrelstats->latestRemovedXid);
+										 &vacrelstats->latestRemovedXid,
+										 &tups_pruned);
 
 		/*
 		 * Now scan the page to collect vacuumable items and check for tuples
@@ -1206,15 +1218,31 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 		}
 
 		/*
-		 * If there are no indexes then we can vacuum the page right now
-		 * instead of doing a second scan.
+		 * If there are no indexes or we skip index vacuum then we can vacuum
+		 * the page right now instead of doing a second scan.
 		 */
-		if (nindexes == 0 &&
+		if ((nindexes == 0 || skip_index_vacuum) &&
 			vacrelstats->num_dead_tuples > 0)
 		{
-			/* Remove tuples from heap */
-			lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
-			has_dead_tuples = false;
+			if (nindexes == 0)
+			{
+				/* Remove tuples from heap if the table has no index */
+				lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer);
+				has_dead_tuples = false;
+			}
+			else
+			{
+				/*
+				 * Here, we have indexes but index vacuum is disabled. We don't
+				 * vacuum dead tuples on heap but forget them as we skip index
+				 * vacuum. The vacrelstats->dead_tuples could have tuples which
+				 * became dead after checked at HOT-pruning time but aren't marked
+				 * as dead yet. We don't process them because it's a very rare
+				 * condition and the next vacuum will process them.
+				 */
+				Assert(skip_index_vacuum);
+				nleft += vacrelstats->num_dead_tuples;
+			}
 
 			/*
 			 * Forget the now-vacuumed tuples, and press on, but be careful
@@ -1379,6 +1407,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 		};
 		int64		hvp_val[2];
 
+		/* Index vacuum must be enabled in two-pass vacuum */
+		Assert(!skip_index_vacuum);
+
 		/* Log cleanup info before we touch indexes */
 		vacuum_log_cleanup_info(onerel, vacrelstats);
 
@@ -1417,15 +1448,21 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 								 PROGRESS_VACUUM_PHASE_INDEX_CLEANUP);
 
 	/* Do post-vacuum cleanup and statistics update for each index */
-	for (i = 0; i < nindexes; i++)
-		lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
+	if (!skip_index_vacuum)
+		for (i = 0; i < nindexes; i++)
+			lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
 
-	/* If no indexes, make log report that lazy_vacuum_heap would've made */
+	/*
+	 * If no index or disables index vacuum, make log report that lazy_vacuum_heap
+	 * would've made. If index vacuum is disabled, we didn't remove all dead
+	 * tuples but did for tuples removed by HOT-pruning.
+	 */
 	if (vacuumed_pages)
 		ereport(elevel,
 				(errmsg("\"%s\": removed %.0f row versions in %u pages",
 						RelationGetRelationName(onerel),
-						tups_vacuumed, vacuumed_pages)));
+						skip_index_vacuum ? tups_pruned : tups_vacuumed,
+						vacuumed_pages)));
 
 	/*
 	 * This is pretty messy, but we split it up so that we can skip emitting
@@ -1449,12 +1486,17 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
 									"%u pages are entirely empty.\n",
 									empty_pages),
 					 empty_pages);
+	appendStringInfo(&buf, ngettext("%.0f tuple is left as dead.\n",
+									"%.0f tuples are left as dead.\n",
+									nleft),
+					 nleft);
 	appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
 
 	ereport(elevel,
 			(errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u out of %u pages",
 					RelationGetRelationName(onerel),
-					tups_vacuumed, num_tuples,
+					skip_index_vacuum ? tups_pruned : tups_vacuumed,
+					num_tuples,
 					vacrelstats->scanned_pages, nblocks),
 			 errdetail_internal("%s", buf.data)));
 	pfree(buf.data);
@@ -2085,14 +2127,15 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
  * See the comments at the head of this file for rationale.
  */
 static void
-lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks)
+lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks,
+				 bool skip_index_vacuum)
 {
 	long		maxtuples;
 	int			vac_work_mem = IsAutoVacuumWorkerProcess() &&
 	autovacuum_work_mem != -1 ?
 	autovacuum_work_mem : maintenance_work_mem;
 
-	if (vacrelstats->hasindex)
+	if (vacrelstats->hasindex && !skip_index_vacuum)
 	{
 		maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData);
 		maxtuples = Min(maxtuples, INT_MAX);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e91df21..00024dd 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -203,7 +203,8 @@ vacuum(int options, List *relations, VacuumParams *params,
 						stmttype)));
 
 	/*
-	 * Sanity check DISABLE_PAGE_SKIPPING option.
+	 * Sanity check DISABLE_PAGE_SKIPPING option and DISABLE_INDEX_CLEANUP
+	 * option.
 	 */
 	if ((options & VACOPT_FULL) != 0 &&
 		(options & VACOPT_DISABLE_PAGE_SKIPPING) != 0)
@@ -211,6 +212,11 @@ vacuum(int options, List *relations, VacuumParams *params,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL")));
 
+	if ((options & VACOPT_FULL) != 0 &&
+		(options & VACOPT_DISABLE_INDEX_CLEANUP) != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("VACUUM option DISABLE_INDEX_CLEANUP cannot be used with FULL")));
 	/*
 	 * Send info about dead objects to the statistics collector, unless we are
 	 * in autovacuum --- autovacuum.c does this for itself.
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 0279013..0b13ad7 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -10471,6 +10471,8 @@ vacuum_option_elem:
 				{
 					if (strcmp($1, "disable_page_skipping") == 0)
 						$$ = VACOPT_DISABLE_PAGE_SKIPPING;
+					else if (strcmp($1, "disable_index_cleanup") == 0)
+						$$ = VACOPT_DISABLE_INDEX_CLEANUP;
 					else if (strcmp($1, "skip_locked") == 0)
 						$$ = VACOPT_SKIP_LOCKED;
 					else
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index ab08791..e0230d7 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -170,7 +170,8 @@ extern void heap_update_snapshot(HeapScanDesc scan, Snapshot snapshot);
 extern void heap_page_prune_opt(Relation relation, Buffer buffer);
 extern int heap_page_prune(Relation relation, Buffer buffer,
 				TransactionId OldestXmin,
-				bool report_stats, TransactionId *latestRemovedXid);
+				bool report_stats, TransactionId *latestRemovedXid,
+				double *nunused);
 extern void heap_page_prune_execute(Buffer buffer,
 						OffsetNumber *redirected, int nredirected,
 						OffsetNumber *nowdead, int ndead,
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index a7e859d..f6d52ac 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -3163,7 +3163,8 @@ typedef enum VacuumOption
 	VACOPT_FULL = 1 << 4,		/* FULL (non-concurrent) vacuum */
 	VACOPT_SKIP_LOCKED = 1 << 5,	/* skip if cannot get lock */
 	VACOPT_SKIPTOAST = 1 << 6,	/* don't process the TOAST table, if any */
-	VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7	/* don't skip any pages */
+	VACOPT_DISABLE_PAGE_SKIPPING = 1 << 7,	/* don't skip any pages */
+	VACOPT_DISABLE_INDEX_CLEANUP = 1 << 8	/* skip index vacuum and cleanup */
 } VacuumOption;
 
 /*
diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out
index fa9d663..cffce59 100644
--- a/src/test/regress/expected/vacuum.out
+++ b/src/test/regress/expected/vacuum.out
@@ -80,6 +80,9 @@ CONTEXT:  SQL function "do_analyze" statement 1
 SQL function "wrap_do_analyze" statement 1
 VACUUM FULL vactst;
 VACUUM (DISABLE_PAGE_SKIPPING) vaccluster;
+VACUUM (DISABLE_INDEX_CLEANUP) vaccluster;
+VACUUM (DISABLE_INDEX_CLEANUP) vactst; -- DISABLE_INDEX_CLEANUP is ignored
+VACUUM (DISABLE_INDEX_CLEANUP, FREEZE) vaccluster;
 -- partitioned table
 CREATE TABLE vacparted (a int, b char) PARTITION BY LIST (a);
 CREATE TABLE vacparted1 PARTITION OF vacparted FOR VALUES IN (1);
diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql
index 9defa0d..9c4bdb7 100644
--- a/src/test/regress/sql/vacuum.sql
+++ b/src/test/regress/sql/vacuum.sql
@@ -61,6 +61,9 @@ VACUUM FULL vaccluster;
 VACUUM FULL vactst;
 
 VACUUM (DISABLE_PAGE_SKIPPING) vaccluster;
+VACUUM (DISABLE_INDEX_CLEANUP) vaccluster;
+VACUUM (DISABLE_INDEX_CLEANUP) vactst; -- DISABLE_INDEX_CLEANUP is ignored
+VACUUM (DISABLE_INDEX_CLEANUP, FREEZE) vaccluster;
 
 -- partitioned table
 CREATE TABLE vacparted (a int, b char) PARTITION BY LIST (a);
-- 
2.10.5

