From 88c6a83543ff0ba1a788217983543163169f7432 Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Thu, 21 Mar 2024 17:11:16 +0900
Subject: [PATCH v77 3/3] PoC: Make shared TidStore create its own DSA area.

---
 src/backend/access/common/tidstore.c          | 85 +++++++++++++++----
 src/backend/access/heap/vacuumlazy.c          |  5 +-
 src/backend/commands/vacuumparallel.c         | 38 +++------
 src/include/access/tidstore.h                 |  7 +-
 .../modules/test_tidstore/test_tidstore.c     | 14 +--
 5 files changed, 90 insertions(+), 59 deletions(-)

diff --git a/src/backend/access/common/tidstore.c b/src/backend/access/common/tidstore.c
index 745393806d..8f5df7d89f 100644
--- a/src/backend/access/common/tidstore.c
+++ b/src/backend/access/common/tidstore.c
@@ -7,9 +7,9 @@
  * Internally it uses a radix tree as the storage for TIDs. The key is the
  * BlockNumber and the value is a bitmap of offsets, BlocktableEntry.
  *
- * TidStore can be shared among parallel worker processes by passing DSA area
- * to TidStoreCreate(). Other backends can attach to the shared TidStore by
- * TidStoreAttach().
+ * TidStore can be shared among parallel worker processes by using
+ * TidStoreCreateShared(). Other backends can attach to the shared TidStore
+ * by TidStoreAttach().
  *
  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -105,9 +105,25 @@ struct TidStoreIter
 	TidStoreIterResult output;
 };
 
+static TidStore * tidstore_create_internal(size_t max_bytes, bool shared,
+										   int tranche_id);
 static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
 									   BlocktableEntry *page);
 
+/* Public APIs to create local or shared TidStore */
+
+TidStore *
+TidStoreCreateLocal(size_t max_bytes)
+{
+	return tidstore_create_internal(max_bytes, false, 0);
+}
+
+TidStore *
+TidStoreCreateShared(size_t max_bytes, int tranche_id)
+{
+	return tidstore_create_internal(max_bytes, true, tranche_id);
+}
+
 /*
  * Create a TidStore. The TidStore will live in the memory context that is
  * CurrentMemoryContext at the time of this call. The TID storage, backed
@@ -118,8 +134,8 @@ static void tidstore_iter_extract_tids(TidStoreIter *iter, BlockNumber blkno,
  *
  * The returned object is allocated in backend-local memory.
  */
-TidStore *
-TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id)
+static TidStore *
+tidstore_create_internal(size_t max_bytes, bool shared, int tranche_id)
 {
 	TidStore   *ts;
 	size_t		initBlockSize = ALLOCSET_DEFAULT_INITSIZE;
@@ -143,8 +159,27 @@ TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id)
 										   initBlockSize,
 										   maxBlockSize);
 
-	if (area != NULL)
+	if (shared)
 	{
+		dsa_area *area;
+		size_t	dsa_init_size = DSA_INITIAL_SEGMENT_SIZE;
+		size_t	dsa_max_size = DSA_MAX_SEGMENT_SIZE;
+
+		/*
+		 * Choose the DSA initial and max segment sizes to be no longer than
+		 * 1/16 and 1/8 of max_bytes, respectively.
+		 */
+		while (16 * dsa_init_size > max_bytes * 1024L)
+			dsa_init_size >>= 1;
+		while (8 * dsa_max_size > max_bytes * 1024L)
+			dsa_max_size >>= 1;
+
+		if (dsa_init_size < DSA_MIN_SEGMENT_SIZE)
+			dsa_init_size = DSA_MIN_SEGMENT_SIZE;
+		if (dsa_max_size < DSA_MAX_SEGMENT_SIZE)
+			dsa_max_size = DSA_MAX_SEGMENT_SIZE;
+
+		area = dsa_create_ext(tranche_id, dsa_init_size, dsa_max_size);
 		ts->tree.shared = shared_ts_create(ts->rt_context, area,
 										   tranche_id);
 		ts->area = area;
@@ -156,20 +191,25 @@ TidStoreCreate(size_t max_bytes, dsa_area *area, int tranche_id)
 }
 
 /*
- * Attach to the shared TidStore using the given  handle. The returned object
- * is allocated in backend-local memory using the CurrentMemoryContext.
+ * Attach to the shared TidStore. 'area_handle' is the DSA handle where
+ * the TidStore is created. 'handle' is the dsa_pointer returned by
+ * TidStoreGetHandle(). The returned object is allocated in backend-local
+ * memory using the CurrentMemoryContext.
  */
 TidStore *
-TidStoreAttach(dsa_area *area, dsa_pointer handle)
+TidStoreAttach(dsa_handle area_handle, dsa_pointer handle)
 {
 	TidStore   *ts;
+	dsa_area *area;
 
-	Assert(area != NULL);
+	Assert(area_handle != DSA_HANDLE_INVALID);
 	Assert(DsaPointerIsValid(handle));
 
 	/* create per-backend state */
 	ts = palloc0(sizeof(TidStore));
 
+	area = dsa_attach(area_handle);
+
 	/* Find the shared the shared radix tree */
 	ts->tree.shared = shared_ts_attach(area, handle);
 	ts->area = area;
@@ -178,10 +218,8 @@ TidStoreAttach(dsa_area *area, dsa_pointer handle)
 }
 
 /*
- * Detach from a TidStore. This detaches from radix tree and frees the
- * backend-local resources. The radix tree will continue to exist until
- * it is either explicitly destroyed, or the area that backs it is returned
- * to the operating system.
+ * Detach from a TidStore. This also detaches from radix tree and frees
+ * the backend-local resources.
  */
 void
 TidStoreDetach(TidStore *ts)
@@ -189,6 +227,8 @@ TidStoreDetach(TidStore *ts)
 	Assert(TidStoreIsShared(ts));
 
 	shared_ts_detach(ts->tree.shared);
+	dsa_detach(ts->area);
+
 	pfree(ts);
 }
 
@@ -232,9 +272,13 @@ TidStoreUnlock(TidStore *ts)
 void
 TidStoreDestroy(TidStore *ts)
 {
-	/* Destroy underlying radix tree */
 	if (TidStoreIsShared(ts))
+	{
+		/* Destroy underlying radix tree */
 		shared_ts_free(ts->tree.shared);
+
+		dsa_detach(ts->area);
+	}
 	else
 		local_ts_free(ts->tree.local);
 
@@ -420,6 +464,17 @@ TidStoreMemoryUsage(TidStore *ts)
 		return local_ts_memory_usage(ts->tree.local);
 }
 
+/*
+ * Return the DSA area where the TidStore lives.
+ */
+dsa_area *
+TidStoreGetDSA(TidStore *ts)
+{
+	Assert(TidStoreIsShared(ts));
+
+	return ts->area;
+}
+
 dsa_pointer
 TidStoreGetHandle(TidStore *ts)
 {
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index f6c09c8da1..7bb2a95a82 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -3155,7 +3155,7 @@ dead_items_alloc(LVRelState *vacrel, int nworkers)
 	}
 
 	/* Serial VACUUM case */
-	vacrel->dead_items = TidStoreCreate(vac_work_mem, NULL, 0);
+	vacrel->dead_items = TidStoreCreateLocal(vac_work_mem);
 
 	dead_items_info = (VacDeadItemsInfo *) palloc(sizeof(VacDeadItemsInfo));
 	dead_items_info->max_bytes = vac_work_mem * 1024L;
@@ -3196,8 +3196,7 @@ dead_items_reset(LVRelState *vacrel)
 
 	/* Recreate the tidstore with the same max_bytes limitation */
 	TidStoreDestroy(dead_items);
-	vacrel->dead_items = TidStoreCreate(vacrel->dead_items_info->max_bytes,
-										NULL, 0);
+	vacrel->dead_items = TidStoreCreateLocal(vacrel->dead_items_info->max_bytes);
 
 	/* Reset the counter */
 	vacrel->dead_items_info->num_items = 0;
diff --git a/src/backend/commands/vacuumparallel.c b/src/backend/commands/vacuumparallel.c
index 4bd0df3b5e..6e45fa4b95 100644
--- a/src/backend/commands/vacuumparallel.c
+++ b/src/backend/commands/vacuumparallel.c
@@ -45,7 +45,7 @@
  * use small integers.
  */
 #define PARALLEL_VACUUM_KEY_SHARED			1
-#define PARALLEL_VACUUM_KEY_DEAD_ITEMS		2
+/* 2 was PARALLEL_VACUUM_KEY_DEAD_ITEMS */
 #define PARALLEL_VACUUM_KEY_QUERY_TEXT		3
 #define PARALLEL_VACUUM_KEY_BUFFER_USAGE	4
 #define PARALLEL_VACUUM_KEY_WAL_USAGE		5
@@ -111,6 +111,9 @@ typedef struct PVShared
 	/* Counter for vacuuming and cleanup */
 	pg_atomic_uint32 idx;
 
+	/* DSA handle where the TidStore lives */
+	dsa_handle	dead_items_dsa_handle;
+
 	/* DSA pointer to the shared TidStore */
 	dsa_pointer dead_items_handle;
 
@@ -183,7 +186,6 @@ struct ParallelVacuumState
 
 	/* Shared dead items space among parallel vacuum workers */
 	TidStore   *dead_items;
-	dsa_area   *dead_items_area;
 
 	/* Points to buffer usage area in DSM */
 	BufferUsage *buffer_usage;
@@ -249,12 +251,9 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	PVIndStats *indstats;
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
-	void	   *area_space;
-	dsa_area   *dead_items_dsa;
 	bool	   *will_parallel_vacuum;
 	Size		est_indstats_len;
 	Size		est_shared_len;
-	Size		dsa_minsize = dsa_minimum_size();
 	int			nindexes_mwm = 0;
 	int			parallel_workers = 0;
 	int			querylen;
@@ -303,10 +302,6 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
 	shm_toc_estimate_keys(&pcxt->estimator, 1);
 
-	/* Initial size of DSA for dead tuples -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
-	shm_toc_estimate_chunk(&pcxt->estimator, dsa_minsize);
-	shm_toc_estimate_keys(&pcxt->estimator, 1);
-
 	/*
 	 * Estimate space for BufferUsage and WalUsage --
 	 * PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
@@ -371,15 +366,8 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 	pvs->indstats = indstats;
 
 	/* Prepare DSA space for dead items */
-	area_space = shm_toc_allocate(pcxt->toc, dsa_minsize);
-	shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, area_space);
-	dead_items_dsa = dsa_create_in_place(area_space, dsa_minsize,
-										 LWTRANCHE_PARALLEL_VACUUM_DSA,
-										 pcxt->seg);
-	dead_items = TidStoreCreate(vac_work_mem, dead_items_dsa,
-								LWTRANCHE_PARALLEL_VACUUM_DSA);
+	dead_items = TidStoreCreateShared(vac_work_mem, LWTRANCHE_PARALLEL_VACUUM_DSA);
 	pvs->dead_items = dead_items;
-	pvs->dead_items_area = dead_items_dsa;
 
 	/* Prepare shared information */
 	shared = (PVShared *) shm_toc_allocate(pcxt->toc, est_shared_len);
@@ -390,6 +378,7 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
 		(nindexes_mwm > 0) ?
 		maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
 		maintenance_work_mem;
+	shared->dead_items_dsa_handle = dsa_get_handle(TidStoreGetDSA(dead_items));
 	shared->dead_items_handle = TidStoreGetHandle(dead_items);
 	shared->dead_items_info.max_bytes = vac_work_mem * 1024L;
 
@@ -461,7 +450,6 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
 	}
 
 	TidStoreDestroy(pvs->dead_items);
-	dsa_detach(pvs->dead_items_area);
 
 	DestroyParallelContext(pvs->pcxt);
 	ExitParallelMode();
@@ -493,11 +481,11 @@ parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
 	 * limitation we just used.
 	 */
 	TidStoreDestroy(dead_items);
-	dsa_trim(pvs->dead_items_area);
-	pvs->dead_items = TidStoreCreate(dead_items_info->max_bytes, pvs->dead_items_area,
-									 LWTRANCHE_PARALLEL_VACUUM_DSA);
+	pvs->dead_items = TidStoreCreateShared(dead_items_info->max_bytes,
+									   LWTRANCHE_PARALLEL_VACUUM_DSA);
 
 	/* Update the DSA pointer for dead_items to the new one */
+	pvs->shared->dead_items_dsa_handle = dsa_get_handle(TidStoreGetDSA(dead_items));
 	pvs->shared->dead_items_handle = TidStoreGetHandle(dead_items);
 
 	/* Reset the counter */
@@ -1005,8 +993,6 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 	PVIndStats *indstats;
 	PVShared   *shared;
 	TidStore   *dead_items;
-	void	   *area_space;
-	dsa_area   *dead_items_area;
 	BufferUsage *buffer_usage;
 	WalUsage   *wal_usage;
 	int			nindexes;
@@ -1051,9 +1037,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 											 false);
 
 	/* Set dead items */
-	area_space = shm_toc_lookup(toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, false);
-	dead_items_area = dsa_attach_in_place(area_space, seg);
-	dead_items = TidStoreAttach(dead_items_area, shared->dead_items_handle);
+	dead_items = TidStoreAttach(shared->dead_items_dsa_handle,
+								shared->dead_items_handle);
 
 	/* Set cost-based vacuum delay */
 	VacuumUpdateCosts();
@@ -1102,7 +1087,6 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
 						  &wal_usage[ParallelWorkerNumber]);
 
 	TidStoreDetach(dead_items);
-	dsa_detach(dead_items_area);
 
 	/* Pop the error context stack */
 	error_context_stack = errcallback.previous;
diff --git a/src/include/access/tidstore.h b/src/include/access/tidstore.h
index 8cf4e94f12..1cc695f90a 100644
--- a/src/include/access/tidstore.h
+++ b/src/include/access/tidstore.h
@@ -29,9 +29,9 @@ typedef struct TidStoreIterResult
 	OffsetNumber *offsets;
 } TidStoreIterResult;
 
-extern TidStore *TidStoreCreate(size_t max_bytes, dsa_area *dsa,
-								int tranche_id);
-extern TidStore *TidStoreAttach(dsa_area *dsa, dsa_pointer rt_dp);
+extern TidStore *TidStoreCreateLocal(size_t max_bytes);
+extern TidStore *TidStoreCreateShared(size_t max_bytes, int tranche_id);
+extern TidStore *TidStoreAttach(dsa_handle dsa_handle, dsa_pointer rt_dp);
 extern void TidStoreDetach(TidStore *ts);
 extern void TidStoreLockExclusive(TidStore *ts);
 extern void TidStoreLockShare(TidStore *ts);
@@ -45,5 +45,6 @@ extern TidStoreIterResult *TidStoreIterateNext(TidStoreIter *iter);
 extern void TidStoreEndIterate(TidStoreIter *iter);
 extern size_t TidStoreMemoryUsage(TidStore *ts);
 extern dsa_pointer TidStoreGetHandle(TidStore *ts);
+extern dsa_area *TidStoreGetDSA(TidStore *ts);
 
 #endif							/* TIDSTORE_H */
diff --git a/src/test/modules/test_tidstore/test_tidstore.c b/src/test/modules/test_tidstore/test_tidstore.c
index c74ad2cf8b..3d4af77dda 100644
--- a/src/test/modules/test_tidstore/test_tidstore.c
+++ b/src/test/modules/test_tidstore/test_tidstore.c
@@ -34,7 +34,6 @@ PG_FUNCTION_INFO_V1(test_is_full);
 PG_FUNCTION_INFO_V1(test_destroy);
 
 static TidStore *tidstore = NULL;
-static dsa_area *dsa = NULL;
 static size_t tidstore_empty_size;
 
 /* array for verification of some tests */
@@ -94,7 +93,6 @@ test_create(PG_FUNCTION_ARGS)
 	size_t		array_init_size = 1024;
 
 	Assert(tidstore == NULL);
-	Assert(dsa == NULL);
 
 	/*
 	 * Create the TidStore on TopMemoryContext so that the same process use it
@@ -109,18 +107,16 @@ test_create(PG_FUNCTION_ARGS)
 		tranche_id = LWLockNewTrancheId();
 		LWLockRegisterTranche(tranche_id, "test_tidstore");
 
-		dsa = dsa_create(tranche_id);
+		tidstore = TidStoreCreateShared(tidstore_max_size, tranche_id);
 
 		/*
 		 * Remain attached until end of backend or explicitly detached so that
 		 * the same process use the tidstore for subsequent tests.
 		 */
-		dsa_pin_mapping(dsa);
-
-		tidstore = TidStoreCreate(tidstore_max_size, dsa, tranche_id);
+		dsa_pin_mapping(TidStoreGetDSA(tidstore));
 	}
 	else
-		tidstore = TidStoreCreate(tidstore_max_size, NULL, 0);
+		tidstore = TidStoreCreateLocal(tidstore_max_size);
 
 	tidstore_empty_size = TidStoreMemoryUsage(tidstore);
 
@@ -309,9 +305,5 @@ test_destroy(PG_FUNCTION_ARGS)
 	pfree(items.lookup_tids);
 	pfree(items.iter_tids);
 
-	if (dsa)
-		dsa_detach(dsa);
-	dsa = NULL;
-
 	PG_RETURN_VOID();
 }
-- 
2.39.3

