From 975131a29a2a5290147d4c813da1479a6268ac18 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Tue, 30 Mar 2021 19:43:06 -0700
Subject: [PATCH v11 1/2] Truncate line pointer array during VACUUM.

Truncate each heap page's line pointer array when a contiguous group of
LP_UNUSED item pointers appears at the end of the array.  This happens
during VACUUM's second pass over the heap.  In practice most affected
LP_UNUSED line pointers are truncated away at the same point that VACUUM
marks them LP_UNUSED (from LP_DEAD).

Author: Matthias van de Meent <boekewurm+postgres@gmail.com>
Author: Peter Geoghegan <pg@bowt.ie>
Discussion: https://postgr.es/m/CAEze2WjgaQc55Y5f5CQd3L=eS5CZcff2Obxp=O6pto8-f0hC4w@mail.gmail.com
---
 src/include/storage/bufpage.h        |   1 +
 src/backend/access/heap/heapam.c     |  22 ++++--
 src/backend/access/heap/pruneheap.c  |   4 +
 src/backend/access/heap/vacuumlazy.c |  16 +++-
 src/backend/storage/page/bufpage.c   | 112 ++++++++++++++++++++++++++-
 5 files changed, 144 insertions(+), 11 deletions(-)

diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index 359b749f7f..c86ccdaf60 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -441,6 +441,7 @@ extern Page PageGetTempPageCopy(Page page);
 extern Page PageGetTempPageCopySpecial(Page page);
 extern void PageRestoreTempPage(Page tempPage, Page oldPage);
 extern void PageRepairFragmentation(Page page);
+extern void PageTruncateLinePointerArray(Page page);
 extern Size PageGetFreeSpace(Page page);
 extern Size PageGetFreeSpaceForMultipleTuples(Page page, int ntups);
 extern Size PageGetExactFreeSpace(Page page);
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 9cbc161d7a..4d5247fb0b 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -635,8 +635,15 @@ heapgettup(HeapScanDesc scan,
 		}
 		else
 		{
+			/*
+			 * The previous returned tuple may have been vacuumed since the
+			 * previous scan when we use a non-MVCC snapshot, so we must
+			 * re-establish the lineoff <= PageGetMaxOffsetNumber(dp)
+			 * invariant
+			 */
 			lineoff =			/* previous offnum */
-				OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self)));
+				Min(lines,
+					OffsetNumberPrev(ItemPointerGetOffsetNumber(&(tuple->t_self))));
 		}
 		/* page and lineoff now reference the physically previous tid */
 
@@ -678,6 +685,13 @@ heapgettup(HeapScanDesc scan,
 	lpp = PageGetItemId(dp, lineoff);
 	for (;;)
 	{
+		/*
+		 * Only continue scanning the page while we have lines left.
+		 *
+		 * Note that this protects us from accessing line pointers past
+		 * PageGetMaxOffsetNumber(); both for forward scans when we resume
+		 * the table scan, and for when we start scanning a new page.
+		 */
 		while (linesleft > 0)
 		{
 			if (ItemIdIsNormal(lpp))
@@ -8556,10 +8570,8 @@ heap_xlog_vacuum(XLogReaderState *record)
 			ItemIdSetUnused(lp);
 		}
 
-		/*
-		 * Update the page's hint bit about whether it has free pointers
-		 */
-		PageSetHasFreeLinePointers(page);
+		/* Attempt to truncate line pointer array now */
+		PageTruncateLinePointerArray(page);
 
 		PageSetLSN(page, lsn);
 		MarkBufferDirty(buffer);
diff --git a/src/backend/access/heap/pruneheap.c b/src/backend/access/heap/pruneheap.c
index f75502ca2c..0c8e49d3e6 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -962,6 +962,10 @@ heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
 		 */
 		for (;;)
 		{
+			/* Sanity check */
+			if (nextoffnum < FirstOffsetNumber || nextoffnum > maxoff)
+				break;
+
 			lp = PageGetItemId(page, nextoffnum);
 
 			/* Check for broken chains */
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 446e3bc452..1d55d0ecf9 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -1444,7 +1444,11 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params, bool aggressive)
 		if (prunestate.has_lpdead_items && vacrel->do_index_vacuuming)
 		{
 			/*
-			 * Wait until lazy_vacuum_heap_rel() to save free space.
+			 * Wait until lazy_vacuum_heap_rel() to save free space.  This
+			 * doesn't just save us some cycles; it also allows us to record
+			 * any additional free space that lazy_vacuum_heap_page() will
+			 * make available in cases where it's possible to truncate the
+			 * page's line pointer array.
 			 *
 			 * Note: The one-pass (no indexes) case is only supposed to make
 			 * it this far when there were no LP_DEAD items during pruning.
@@ -2033,6 +2037,13 @@ lazy_vacuum_all_indexes(LVRelState *vacrel)
  * Pages that never had lazy_scan_prune record LP_DEAD items are not visited
  * at all.
  *
+ * We may also be able to truncate the line pointer array of the heap pages we
+ * visit.  If there is a contiguous group of LP_UNUSED items at the end of the
+ * array, it can be reclaimed as free space.  These LP_UNUSED items usually
+ * start out as LP_DEAD items recorded by lazy_scan_prune (we set items from
+ * each page to LP_UNUSED, and then consider if it's possible to truncate the
+ * page's line pointer array).
+ *
  * Note: the reason for doing this as a second pass is we cannot remove the
  * tuples until we've removed their index entries, and we want to process
  * index entry removal in batches as large as possible.
@@ -2175,7 +2186,8 @@ lazy_vacuum_heap_page(LVRelState *vacrel, BlockNumber blkno, Buffer buffer,
 
 	Assert(uncnt > 0);
 
-	PageSetHasFreeLinePointers(page);
+	/* Attempt to truncate line pointer array now */
+	PageTruncateLinePointerArray(page);
 
 	/*
 	 * Mark buffer dirty before we write WAL.
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 5d5989c2f5..a4eed5cdcd 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -250,8 +250,17 @@ PageAddItemExtended(Page page,
 		/* if no free slot, we'll put it at limit (1st open slot) */
 		if (PageHasFreeLinePointers(phdr))
 		{
-			/* Look for "recyclable" (unused) ItemId */
-			for (offsetNumber = 1; offsetNumber < limit; offsetNumber++)
+			/*
+			 * Scan line pointer array to locate a "recyclable" (unused)
+			 * ItemId.
+			 *
+			 * Always use earlier items first.  PageTruncateLinePointerArray
+			 * can only truncate unused items when they appear as a contiguous
+			 * group at the end of the line pointer array.
+			 */
+			for (offsetNumber = FirstOffsetNumber;
+				 offsetNumber < limit;		/* limit is maxoff+1 */
+				 offsetNumber++)
 			{
 				itemId = PageGetItemId(phdr, offsetNumber);
 
@@ -675,11 +684,23 @@ compactify_tuples(itemIdCompact itemidbase, int nitems, Page page, bool presorte
 /*
  * PageRepairFragmentation
  *
- * Frees fragmented space on a page.
- * It doesn't remove unused line pointers! Please don't change this.
+ * Frees fragmented space on a heap page following pruning.
  *
  * This routine is usable for heap pages only, but see PageIndexMultiDelete.
  *
+ * Never removes unused line pointers.  PageTruncateLinePointerArray can
+ * safely remove some unused line pointers.  It ought to be safe for this
+ * routine to free unused line pointers in roughly the same way, but it's not
+ * clear that that would be beneficial.
+ *
+ * PageTruncateLinePointerArray is only called during VACUUM's second pass
+ * over the heap.  Any unused line pointers that it sees are likely to have
+ * been set to LP_UNUSED (from LP_DEAD) immediately before the time it is
+ * called.  On the other hand, many tables have the vast majority of all
+ * required pruning performed opportunistically (not during VACUUM).  And so
+ * there is, in general, a good chance that all of the unused line pointers
+ * we'll see on the page are ceaselessly recycled, again and again.
+ *
  * Caller had better have a super-exclusive lock on page's buffer.  As a side
  * effect the page's PD_HAS_FREE_LINES hint bit will be set or unset as
  * needed.
@@ -784,6 +805,89 @@ PageRepairFragmentation(Page page)
 		PageClearHasFreeLinePointers(page);
 }
 
+/*
+ * PageTruncateLinePointerArray
+ *
+ * Removes unused line pointers at the end of the line pointer array.
+ *
+ * This routine is usable for heap pages only.  It is called by VACUUM during
+ * its second pass over the heap.  We expect at least one LP_UNUSED line
+ * pointer on the page (if VACUUM didn't have an LP_DEAD item on the page that
+ * it just set to LP_UNUSED then it should not call here).
+ *
+ * We avoid truncating the line pointer array to 0 items, if necessary by
+ * leaving behind a single remaining LP_UNUSED item.  This is a little
+ * arbitrary, but it seems like a good idea to avoid leaving a PageIsEmpty()
+ * page behind.  That is treated as a special case by VACUUM.
+ *
+ * Caller can have either an exclusive lock or a super-exclusive lock on
+ * page's buffer.  The page's PD_HAS_FREE_LINES hint bit will be set or unset
+ * based on whether or not we leave behind any remaining LP_UNUSED items.
+ */
+void
+PageTruncateLinePointerArray(Page page)
+{
+	PageHeader	phdr = (PageHeader) page;
+	bool		countdone = false,
+				sethint = false;
+	int			nunusedend = 0;
+
+	/* Scan line pointer array back-to-front */
+	for (int i = PageGetMaxOffsetNumber(page); i >= FirstOffsetNumber; i--)
+	{
+		ItemId		lp = PageGetItemId(page, i);
+
+		if (!countdone && i > FirstOffsetNumber)
+		{
+			/*
+			 * Still determining which line pointers from the end of the array
+			 * will be truncated away.  Either count another line pointer as
+			 * safe to truncate, or notice that it's not safe to truncate
+			 * additional line pointers (stop counting line pointers).
+			 */
+			if (!ItemIdIsUsed(lp))
+				nunusedend++;
+			else
+				countdone = true;
+		}
+		else
+		{
+			/*
+			 * Once we've stopped counting we still need to figure out if
+			 * there are any remaining LP_UNUSED line pointers somewhere more
+			 * towards the front of the array.
+			 */
+			if (!ItemIdIsUsed(lp))
+			{
+				/*
+				 * This is an unused line pointer that we won't be truncating
+				 * away -- so there is at least one.  Set hint on page.
+				 */
+				sethint = true;
+				break;
+			}
+		}
+	}
+
+	if (nunusedend > 0)
+	{
+		phdr->pd_lower -= sizeof(ItemIdData) * nunusedend;
+
+#ifdef CLOBBER_FREED_MEMORY
+		memset((char *) page + phdr->pd_lower, 0x7F,
+			   sizeof(ItemIdData) * nunusedend);
+#endif
+	}
+	else
+		Assert(sethint);
+
+	/* Set hint bit for PageAddItemExtended */
+	if (sethint)
+		PageSetHasFreeLinePointers(page);
+	else
+		PageClearHasFreeLinePointers(page);
+}
+
 /*
  * PageGetFreeSpace
  *		Returns the size of the free (allocatable) space on a page,
-- 
2.27.0

