From f8733bcb4970db3891bec678bb29099a7e13e13a Mon Sep 17 00:00:00 2001
From: Kirk Jamison <k.jamison@jp.fujitsu.com>
Date: Wed, 23 Dec 2020 12:36:22 +0000
Subject: [PATCH v37 3/3] Optimize DropRelFileNodesAllBuffers() in recovery.

DropRelFileNodesAllBuffers() is optimized to skip the time-consuming
scan of the whole buffer pool during recovery when the relation is
small enough, or when the number of blocks to be invalidated is below
the full scan threshold. This improves the DropRelationFiles()
performance, for example, when the TRUNCATE command truncated off any
of the empty pages at the end of relation.

We first check if all the relations are candidates for optimization
during recovery, by ensuring that a cached size was returned by
smgrnblocks_cached() for a relation fork.  Similar to
DropRelFileNodeBuffers, we invalidate buffer blocks by locating using
BufTableLookup() when it is certain that we know up to what page of
every fork we possiblly have a buffer.  That can be checked only while
recovery through smgrnblocks_cached(), which returns InvalidBlockNumber
if the fork size is not cached.  Otherwise, we proceed to full scan of
the whole buffer pool if a size of a particular relation fork is not
cached, which can happen if there are no updates to that existing fork
during recovery.
---
 src/backend/storage/buffer/bufmgr.c | 97 +++++++++++++++++++++++++++++++++----
 src/backend/storage/smgr/smgr.c     | 14 +++---
 src/include/storage/bufmgr.h        |  2 +-
 3 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f66c641..b9e51a0 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3112,28 +3112,33 @@ DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
+DropRelFileNodesAllBuffers(SMgrRelation *smgr_reln, int nnodes)
 {
-	int			i,
-				n = 0;
+	int			i;
+	int			j;
+	int			n = 0;
+	SMgrRelation	*rels;
+	BlockNumber (*block)[MAX_FORKNUM + 1];
+	BlockNumber	nBlocksToInvalidate = 0;
 	RelFileNode *nodes;
+	bool		cached = true;
 	bool		use_bsearch;
 
 	if (nnodes == 0)
 		return;
 
-	nodes = palloc(sizeof(RelFileNode) * nnodes);	/* non-local relations */
+	rels = palloc(sizeof(SMgrRelation) * nnodes);	/* non-local relations */
 
 	/* If it's a local relation, it's localbuf.c's problem. */
 	for (i = 0; i < nnodes; i++)
 	{
-		if (RelFileNodeBackendIsTemp(rnodes[i]))
+		if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
 		{
-			if (rnodes[i].backend == MyBackendId)
-				DropRelFileNodeAllLocalBuffers(rnodes[i].node);
+			if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
+				DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
 		}
 		else
-			nodes[n++] = rnodes[i].node;
+			rels[n++] = smgr_reln[i];
 	}
 
 	/*
@@ -3142,10 +3147,83 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 	 */
 	if (n == 0)
 	{
-		pfree(nodes);
+		pfree(rels);
 		return;
 	}
 
+	block = (BlockNumber (*)[MAX_FORKNUM + 1])
+			palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
+
+	/*
+	 * Check if all the relations are candidates for buffer drop
+	 * optimization.  Otherwise, we proceed to full scan of the
+	 * whole buffer pool if a size of a particular relation fork
+	 * is not cached, which can happen if there are no updates
+	 * to that fork during recovery.
+	 */
+	for (i = 0; i < n && cached; i++)
+	{
+		for (j = 0; j <= MAX_FORKNUM; j++)
+		{
+			/*
+			 * Get the number of blocks for a relation's fork.
+			 * If we don't have a cached size, then it returns
+			 * InvalidBlockNumber.
+			 */
+			block[i][j] = smgrnblocks_cached(rels[i], j);
+
+			/*
+			 * If the fork size is not cached, check whether or not
+			 * the relation fork exists.  Otherwise, we can skip
+			 * the possibility of costly checks especially if there
+			 * are many relations.  If a fork does not exist, we can
+			 * skip it later when dropping the relation buffers.
+			 */
+			if (block[i][j] == InvalidBlockNumber)
+			{
+				if (!smgrexists(rels[i], j))
+					continue;
+				cached = false;
+				break;
+			}
+
+			nBlocksToInvalidate += block[i][j];
+		}
+	}
+
+	/*
+	 * Enter the optimization if the sizes for all relation forks are
+	 * cached and the total number of blocks to be invalidated for all
+	 * relations is below the full scan threshold.
+	 */
+	if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
+	{
+		for (i = 0; i < n; i++)
+		{
+			/*
+			 * If block to drop is valid, drop the buffers of the fork.
+			 * Zero the firstDelBlock because all buffers will be
+			 * dropped anyway.
+			 */
+			for (j = 0; j <= MAX_FORKNUM; j++)
+			{
+				if (!BlockNumberIsValid(block[i][j]))
+					continue;
+
+				FindAndDropRelFileNodeBuffers(rels[i]->smgr_rnode.node,
+											  j, block[i][j], 0);
+			}
+		}
+		pfree(block);
+		pfree(rels);
+		return;
+	}
+
+	pfree(block);
+	nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
+	for (i = 0; i < n; i++)
+		nodes[i] = rels[i]->smgr_rnode.node;
+
 	/*
 	 * For low number of relations to drop just use a simple walk through, to
 	 * save the bsearch overhead. The threshold to use is rather a guess than
@@ -3201,6 +3279,7 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 	}
 
 	pfree(nodes);
+	pfree(rels);
 }
 
 /* ---------------------------------------------------------------------
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index e5d11ab..7162b18 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -391,7 +391,13 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 		return;
 
 	/*
-	 * create an array which contains all relations to be dropped, and close
+	 * Get rid of any remaining buffers for the relations.  bufmgr will just
+	 * drop them without bothering to write the contents.
+	 */
+	DropRelFileNodesAllBuffers(rels, nrels);
+
+	/*
+	 * Create an array which contains all relations to be dropped, and close
 	 * each relation's forks at the smgr level while at it
 	 */
 	rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
@@ -408,12 +414,6 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 	}
 
 	/*
-	 * Get rid of any remaining buffers for the relations.  bufmgr will just
-	 * drop them without bothering to write the contents.
-	 */
-	DropRelFileNodesAllBuffers(rnodes, nrels);
-
-	/*
 	 * It'd be nice to tell the stats collector to forget them immediately,
 	 * too. But we can't because we don't know the OIDs.
 	 */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 056f65e..2e5189b 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -205,7 +205,7 @@ extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels)
 extern void FlushDatabaseBuffers(Oid dbid);
 extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
 								   int nforks, BlockNumber *firstDelBlock);
-extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
+extern void DropRelFileNodesAllBuffers(struct SMgrRelationData **smgr_reln, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
 
 #define RelationGetNumberOfBlocks(reln) \
-- 
1.8.3.1

