From 6447cd808d421bd9f95a0e5ea64c45d597018149 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Tue, 4 Aug 2020 17:23:04 +0300
Subject: [PATCH] Remove btree page items after page unlink

Currently, page unlink leaves remaining items "as is", but replay of
corresponding WAL-record re-initializes page leaving it with no items.
For the sake of consistency, this commit makes primary delete all the items
during page unlink as well.

Thanks to this change, we now don't mask contents of deleted btree page for
WAL consistency checking.

Discussion: https://postgr.es/m/CAPpHfdt_OTyQpXaPJcWzV2N-LNeNJseNB-K_A66qG%3DL518VTFw%40mail.gmail.com
Author: Alexander Korotkov
Reviewed-by: Peter Geoghegan
---
 contrib/amcheck/verify_nbtree.c     |  7 ++-----
 src/backend/access/nbtree/nbtpage.c | 10 ++++++++++
 src/backend/access/nbtree/nbtxlog.c | 10 +---------
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index e4d501a85d1..c9f9e755dcc 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -2864,11 +2864,8 @@ palloc_btree_page(BtreeCheckState *state, BlockNumber blocknum)
 	 * As noted at the beginning of _bt_binsrch(), an internal page must have
 	 * children, since there must always be a negative infinity downlink
 	 * (there may also be a highkey).  In the case of non-rightmost leaf
-	 * pages, there must be at least a highkey.  Deleted pages on replica
-	 * might contain no items, because page unlink re-initializes
-	 * page-to-be-deleted.  Deleted pages with no items might be on primary
-	 * too due to preceding recovery, but on primary new deletions can't
-	 * happen concurrently to amcheck.
+	 * pages, there must be at least a highkey.  The exceptions are deleted
+	 * pages, which contain no items.
 	 *
 	 * This is correct when pages are half-dead, since internal pages are
 	 * never half-dead, and leaf pages must have a high key when half-dead
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 53dff326808..214c4da3981 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -2058,6 +2058,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
 	BTMetaPageData *metad = NULL;
 	ItemId		itemid;
 	Page		page;
+	PageHeader	header;
 	BTPageOpaque opaque;
 	bool		rightsib_is_rightmost;
 	int			targetlevel;
@@ -2327,6 +2328,15 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, BlockNumber scanblkno,
 	opaque->btpo_flags |= BTP_DELETED;
 	opaque->btpo.xact = ReadNewTransactionId();
 
+	/*
+	 * Replaying page unlink on replica will re-initialize the target page
+	 * leaving it with no items.  For consistency, we remove all the items on
+	 * primary too.
+	 */
+	header = (PageHeader) page;
+	header->pd_lower = SizeOfPageHeaderData;
+	header->pd_upper = header->pd_special;
+
 	/* And update the metapage, if needed */
 	if (BufferIsValid(metabuf))
 	{
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 09d1b0e3419..be0fa450f31 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -1051,15 +1051,7 @@ btree_mask(char *pagedata, BlockNumber blkno)
 
 	maskopaq = (BTPageOpaque) PageGetSpecialPointer(page);
 
-	if (P_ISDELETED(maskopaq))
-	{
-		/*
-		 * Mask page content on a DELETED page since it will be re-initialized
-		 * during replay. See btree_xlog_unlink_page() for details.
-		 */
-		mask_page_content(page);
-	}
-	else if (P_ISLEAF(maskopaq))
+	if (P_ISLEAF(maskopaq))
 	{
 		/*
 		 * In btree leaf pages, it is possible to modify the LP_FLAGS without
-- 
2.14.3

