From f4014423a7e543792bb12c30029a23411af115ed Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postgres@gmail.com>
Date: Fri, 22 Nov 2024 14:54:01 +0100
Subject: [PATCH v0 1/2] Vectorize smgrextend

smgrextend writes blocks, but was one of the final apis to access only single
buffers.  This API change improves that by allowing multi-block extend calls.
---
 src/include/storage/md.h        |  5 +-
 src/include/storage/smgr.h      | 12 ++++-
 src/backend/storage/smgr/md.c   | 92 ++++++++++++++++++++++-----------
 src/backend/storage/smgr/smgr.c | 17 +++---
 4 files changed, 84 insertions(+), 42 deletions(-)

diff --git a/src/include/storage/md.h b/src/include/storage/md.h
index b72293c79a..0e01ca01da 100644
--- a/src/include/storage/md.h
+++ b/src/include/storage/md.h
@@ -26,8 +26,9 @@ extern void mdclose(SMgrRelation reln, ForkNumber forknum);
 extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
 extern void mdunlink(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo);
-extern void mdextend(SMgrRelation reln, ForkNumber forknum,
-					 BlockNumber blocknum, const void *buffer, bool skipFsync);
+extern void mdextendv(SMgrRelation reln, ForkNumber forknum,
+					  BlockNumber blocknum, const void **buffers,
+					  int nblocks, bool skipFsync);
 extern void mdzeroextend(SMgrRelation reln, ForkNumber forknum,
 						 BlockNumber blocknum, int nblocks, bool skipFsync);
 extern bool mdprefetch(SMgrRelation reln, ForkNumber forknum,
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 5ab992f5bd..bee0f3eb6b 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -86,8 +86,9 @@ extern void smgrreleaserellocator(RelFileLocatorBackend rlocator);
 extern void smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
 extern void smgrdosyncall(SMgrRelation *rels, int nrels);
 extern void smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo);
-extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
-					   BlockNumber blocknum, const void *buffer, bool skipFsync);
+extern void smgrextendv(SMgrRelation reln, ForkNumber forknum,
+						BlockNumber blocknum, const void **buffers,
+						int nblocks, bool skipFsync);
 extern void smgrzeroextend(SMgrRelation reln, ForkNumber forknum,
 						   BlockNumber blocknum, int nblocks, bool skipFsync);
 extern bool smgrprefetch(SMgrRelation reln, ForkNumber forknum,
@@ -126,4 +127,11 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	smgrwritev(reln, forknum, blocknum, &buffer, 1, skipFsync);
 }
 
+static inline void
+smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+		   const void *buffer, bool skipFsync)
+{
+	smgrextendv(reln, forknum, blocknum, &buffer, 1, skipFsync);
+}
+
 #endif							/* SMGR_H */
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index cc8a80ee96..56151aa65a 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -448,17 +448,17 @@ mdunlinkfork(RelFileLocatorBackend rlocator, ForkNumber forknum, bool isRedo)
 }
 
 /*
- * mdextend() -- Add a block to the specified relation.
+ * mdextendv() -- Add blocks to the specified relation.
  *
- * The semantics are nearly the same as mdwrite(): write at the
+ * The semantics are nearly the same as mdwritev(): write at the
  * specified position.  However, this is to be used for the case of
  * extending a relation (i.e., blocknum is at or beyond the current
  * EOF).  Note that we assume writing a block beyond current EOF
  * causes intervening file space to become filled with zeroes.
  */
 void
-mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-		 const void *buffer, bool skipFsync)
+mdextendv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+		  const void **buffers, int nblocks, bool skipFsync)
 {
 	off_t		seekpos;
 	int			nbytes;
@@ -466,7 +466,7 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 
 	/* If this build supports direct I/O, the buffer must be I/O aligned. */
 	if (PG_O_DIRECT != 0 && PG_IO_ALIGN_SIZE <= BLCKSZ)
-		Assert((uintptr_t) buffer == TYPEALIGN(PG_IO_ALIGN_SIZE, buffer));
+		Assert((uintptr_t) *buffers == TYPEALIGN(PG_IO_ALIGN_SIZE, *buffers));
 
 	/* This assert is too expensive to have on normally ... */
 #ifdef CHECK_WRITE_VS_EXTEND
@@ -479,40 +479,72 @@ mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	 * InvalidBlockNumber.  (Note that this failure should be unreachable
 	 * because of upstream checks in bufmgr.c.)
 	 */
-	if (blocknum == InvalidBlockNumber)
+	if (blocknum >= InvalidBlockNumber - nblocks)
 		ereport(ERROR,
 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 				 errmsg("cannot extend file \"%s\" beyond %u blocks",
 						relpath(reln->smgr_rlocator, forknum),
 						InvalidBlockNumber)));
 
-	v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
 
-	seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+	while (nblocks > 0)
+	{
+		int		seg_remaining = RELSEG_SIZE - (blocknum % RELSEG_SIZE);
+		int		write_this_seg = Min(seg_remaining, nblocks);
+		struct iovec ios[PG_IOV_MAX];
+		int		tot_io_len = 0;
+		int		num_ios = 0;
+		char   *last_buf_end = NULL;
 
-	Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+		v = _mdfd_getseg(reln, forknum, blocknum, skipFsync, EXTENSION_CREATE);
 
-	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != BLCKSZ)
-	{
-		if (nbytes < 0)
+		seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
+
+		Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
+
+		for (int i = 0; i < write_this_seg; i++)
+		{
+			if (last_buf_end && last_buf_end == (char *) buffers[i])
+			{
+				ios[num_ios - 1].iov_len += BLCKSZ;
+				last_buf_end += BLCKSZ;
+			}
+			else
+			{
+				ios[num_ios].iov_len = BLCKSZ;
+				ios[num_ios].iov_base = (void *) buffers[i];
+				last_buf_end = ((char *) buffers[i]) + BLCKSZ;
+				num_ios++;
+			}
+			tot_io_len += BLCKSZ;
+		}
+
+		if ((nbytes = (int) FileWriteV(v->mdfd_vfd, ios, num_ios, seekpos, WAIT_EVENT_DATA_FILE_EXTEND)) != tot_io_len)
+		{
+			if (nbytes < 0)
+				ereport(ERROR,
+						(errcode_for_file_access(),
+							errmsg("could not extend file \"%s\": %m",
+								   FilePathName(v->mdfd_vfd)),
+							errhint("Check free disk space.")));
+			/* short write: complain appropriately */
 			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not extend file \"%s\": %m",
-							FilePathName(v->mdfd_vfd)),
-					 errhint("Check free disk space.")));
-		/* short write: complain appropriately */
-		ereport(ERROR,
-				(errcode(ERRCODE_DISK_FULL),
-				 errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
-						FilePathName(v->mdfd_vfd),
-						nbytes, BLCKSZ, blocknum),
-				 errhint("Check free disk space.")));
-	}
+					(errcode(ERRCODE_DISK_FULL),
+						errmsg("could not extend file \"%s\": wrote only %d of %d bytes at block %u",
+							   FilePathName(v->mdfd_vfd),
+							   nbytes, tot_io_len, blocknum),
+						errhint("Check free disk space.")));
+		}
 
-	if (!skipFsync && !SmgrIsTemp(reln))
-		register_dirty_segment(reln, forknum, v);
+		if (!skipFsync && !SmgrIsTemp(reln))
+			register_dirty_segment(reln, forknum, v);
 
-	Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+		Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
+
+		blocknum += write_this_seg;
+		buffers += write_this_seg;
+		nblocks -= write_this_seg;
+	}
 }
 
 /*
@@ -1676,9 +1708,9 @@ _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
 				char	   *zerobuf = palloc_aligned(BLCKSZ, PG_IO_ALIGN_SIZE,
 													 MCXT_ALLOC_ZERO);
 
-				mdextend(reln, forknum,
-						 nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
-						 zerobuf, skipFsync);
+				mdextendv(reln, forknum,
+						  nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
+						  (const void **) &zerobuf, 1, skipFsync);
 				pfree(zerobuf);
 			}
 			flags = O_CREAT;
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 925728eb6c..7302fb496d 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -82,8 +82,9 @@ typedef struct f_smgr
 	bool		(*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
 	void		(*smgr_unlink) (RelFileLocatorBackend rlocator, ForkNumber forknum,
 								bool isRedo);
-	void		(*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
-								BlockNumber blocknum, const void *buffer, bool skipFsync);
+	void		(*smgr_extendv) (SMgrRelation reln, ForkNumber forknum,
+								 BlockNumber blocknum, const void **buffers,
+								 int nblocks, bool skipFsync);
 	void		(*smgr_zeroextend) (SMgrRelation reln, ForkNumber forknum,
 									BlockNumber blocknum, int nblocks, bool skipFsync);
 	bool		(*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum,
@@ -116,7 +117,7 @@ static const f_smgr smgrsw[] = {
 		.smgr_create = mdcreate,
 		.smgr_exists = mdexists,
 		.smgr_unlink = mdunlink,
-		.smgr_extend = mdextend,
+		.smgr_extendv = mdextendv,
 		.smgr_zeroextend = mdzeroextend,
 		.smgr_prefetch = mdprefetch,
 		.smgr_maxcombine = mdmaxcombine,
@@ -535,11 +536,11 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
  * causes intervening file space to become filled with zeroes.
  */
 void
-smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
-		   const void *buffer, bool skipFsync)
+smgrextendv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
+			const void **buffers, int nblocks, bool skipFsync)
 {
-	smgrsw[reln->smgr_which].smgr_extend(reln, forknum, blocknum,
-										 buffer, skipFsync);
+	smgrsw[reln->smgr_which].smgr_extendv(reln, forknum, blocknum,
+										  buffers, nblocks, skipFsync);
 
 	/*
 	 * Normally we expect this to increase nblocks by one, but if the cached
@@ -547,7 +548,7 @@ smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
 	 * kernel.
 	 */
 	if (reln->smgr_cached_nblocks[forknum] == blocknum)
-		reln->smgr_cached_nblocks[forknum] = blocknum + 1;
+		reln->smgr_cached_nblocks[forknum] = blocknum + nblocks;
 	else
 		reln->smgr_cached_nblocks[forknum] = InvalidBlockNumber;
 }
-- 
2.45.2

