From dd5828ac7e53112c1dd2af5ca8fd875e92e59f61 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Wed, 1 Jan 2020 12:30:26 -0800
Subject: [PATCH v2] Associate LP_DEAD offsets with WAL record's buffer.

Commit 558a9165e08 taught _bt_delitems_delete() to produce its own XID
horizon on the primary instead of having standbys generate their own
latestRemovedXid.  This brought _bt_delitems_delete() closer to the
behavior of its sibling function, _bt_delitems_vacuum().  A
latestRemovedXid is generated on the primary for both VACUUM and
opportunistic deletion of items with their LP_DEAD bits set.

Bring _bt_delitems_delete() a bit further in the direction of matching
its sibling in how it WAL-logs items that are deleted from the page now.
Associate its array of items to delete with the leaf page buffer, rather
than making it generic WAL data.  This optimization is correct now
because there is no longer any need to generate a latestRemovedXid on a
standby using the array.  Also change xl_btree_delete to use uin32 for
the size of the array of item offsets being deleted; it seems like a
good idea to make this match xl_btree_vacuum.

Bump XLOG_PAGE_MAGIC because xl_btree_delete changed.
---
 src/include/access/nbtree.h           |  3 ++-
 src/include/access/nbtxlog.h          |  6 +++---
 src/backend/access/nbtree/nbtpage.c   | 31 ++++++++++++++-------------
 src/backend/access/nbtree/nbtxlog.c   |  2 +-
 src/backend/access/rmgrdesc/nbtdesc.c |  4 ++--
 5 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 4f84ca83dc..f90ee3a0e0 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -779,7 +779,8 @@ extern bool _bt_page_recyclable(Page page);
 extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
 								OffsetNumber *deletable, int ndeletable);
 extern void _bt_delitems_delete(Relation rel, Buffer buf,
-								OffsetNumber *itemnos, int nitems, Relation heapRel);
+								OffsetNumber *deletable, int ndeletable,
+								Relation heapRel);
 extern int	_bt_pagedel(Relation rel, Buffer buf);
 
 /*
diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h
index 3da5514655..776a9bd723 100644
--- a/src/include/access/nbtxlog.h
+++ b/src/include/access/nbtxlog.h
@@ -126,12 +126,12 @@ typedef struct xl_btree_split
 typedef struct xl_btree_delete
 {
 	TransactionId latestRemovedXid;
-	int			nitems;
+	uint32		ndeleted;
 
-	/* TARGET OFFSET NUMBERS FOLLOW AT THE END */
+	/* DELETED TARGET OFFSET NUMBERS FOLLOW */
 } xl_btree_delete;
 
-#define SizeOfBtreeDelete	(offsetof(xl_btree_delete, nitems) + sizeof(int))
+#define SizeOfBtreeDelete	(offsetof(xl_btree_delete, ndeleted) + sizeof(uint32))
 
 /*
  * This is what we need to know about page reuse within btree.  This record
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 73d28d37a3..6fdff24cac 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -1030,9 +1030,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
 		XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
 
 		/*
-		 * The target-offsets array is not in the buffer, but pretend that it
-		 * is.  When XLogInsert stores the whole buffer, the offsets array
-		 * need not be stored too.
+		 * The deletable array is not in the buffer, but pretend that it is.
+		 * When XLogInsert stores the whole buffer, the offsets array need not
+		 * be stored too.
 		 */
 		XLogRegisterBufData(0, (char *) deletable,
 							ndeletable * sizeof(OffsetNumber));
@@ -1051,16 +1051,16 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
  * As above, must only be used on leaf pages.
  *
  * This routine assumes that the caller has pinned and write locked the
- * buffer.  Also, the given itemnos *must* appear in increasing order in the
- * array.
+ * buffer.  Also, the given deletable array *must* be sorted in ascending
+ * order.
  *
  * This is nearly the same as _bt_delitems_vacuum as far as what it does to
  * the page, but it needs to generate its own recovery conflicts by accessing
- * the heap.  See comments for _bt_delitems_vacuum.
+ * the heap.
  */
 void
 _bt_delitems_delete(Relation rel, Buffer buf,
-					OffsetNumber *itemnos, int nitems,
+					OffsetNumber *deletable, int ndeletable,
 					Relation heapRel)
 {
 	Page		page = BufferGetPage(buf);
@@ -1068,18 +1068,18 @@ _bt_delitems_delete(Relation rel, Buffer buf,
 	TransactionId latestRemovedXid = InvalidTransactionId;
 
 	/* Shouldn't be called unless there's something to do */
-	Assert(nitems > 0);
+	Assert(ndeletable > 0);
 
 	if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
 		latestRemovedXid =
 			index_compute_xid_horizon_for_tuples(rel, heapRel, buf,
-												 itemnos, nitems);
+												 deletable, ndeletable);
 
 	/* No ereport(ERROR) until changes are logged */
 	START_CRIT_SECTION();
 
 	/* Fix the page */
-	PageIndexMultiDelete(page, itemnos, nitems);
+	PageIndexMultiDelete(page, deletable, ndeletable);
 
 	/*
 	 * Unlike _bt_delitems_vacuum, we *must not* clear the vacuum cycle ID,
@@ -1098,18 +1098,19 @@ _bt_delitems_delete(Relation rel, Buffer buf,
 		xl_btree_delete xlrec_delete;
 
 		xlrec_delete.latestRemovedXid = latestRemovedXid;
-		xlrec_delete.nitems = nitems;
+		xlrec_delete.ndeleted = ndeletable;
 
 		XLogBeginInsert();
 		XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
 		XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
 
 		/*
-		 * We need the target-offsets array whether or not we store the whole
-		 * buffer, to allow us to find the latestRemovedXid on a standby
-		 * server.
+		 * The deletable array is not in the buffer, but pretend that it is.
+		 * When XLogInsert stores the whole buffer, the offsets array need not
+		 * be stored too.
 		 */
-		XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
+		XLogRegisterBufData(0, (char *) deletable,
+							ndeletable * sizeof(OffsetNumber));
 
 		recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
 
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 5eca4ab272..8bdbc167c3 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -453,7 +453,7 @@ btree_xlog_delete(XLogReaderState *record)
 
 			unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeDelete);
 
-			PageIndexMultiDelete(page, unused, xlrec->nitems);
+			PageIndexMultiDelete(page, unused, xlrec->ndeleted);
 		}
 
 		/* Mark the page as not containing any LP_DEAD items */
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index e0ec8a4b0b..7d63a7124e 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -53,8 +53,8 @@ btree_desc(StringInfo buf, XLogReaderState *record)
 			{
 				xl_btree_delete *xlrec = (xl_btree_delete *) rec;
 
-				appendStringInfo(buf, "%d items, latest removed xid %u",
-								 xlrec->nitems, xlrec->latestRemovedXid);
+				appendStringInfo(buf, "latestRemovedXid %u; ndeleted %u",
+								 xlrec->latestRemovedXid, xlrec->ndeleted);
 				break;
 			}
 		case XLOG_BTREE_MARK_PAGE_HALFDEAD:
-- 
2.17.1

