From 08df0a11c72677468f38b813dc6b5b25c73a205c Mon Sep 17 00:00:00 2001
From: Greg Nancarrow <gregn4422@gmail.com>
Date: Tue, 24 Aug 2021 14:04:41 +1000
Subject: [PATCH] Fix parallel worker failed assertion and coredump.

Currently, InitializeParallelDSM() calls GetTransactionSnapshot() and serializes
both the active_snapshot and transaction_snapshot, for parallel workers to
restore and use.
This appears to be correct in the IsolationUsesXactSnapshot() case, but in the
!IsolationUsesXactSnapshot() case the transaction_snapshot is potentially a
later snapshot than the active_snapshot, resulting in setting TransactionXmin
greater than the xmin of the snapshot used in the query execution by the
workers.
For the !IsolationUsesXactSnapshot() case, this patch removes the extraneous
call to GetTransactionSnapshot() and modifies the parallel DSM code to just pass
the active_snapshot, and restore it in the parallel workers as the transaction
snapshot and set it as the active snapshot.
This resolves an assertion failure in SubTransGetTopmostTransaction() that can
occur in a parallel scan, after snapshot suboverflow has occurred.

Discussion: https://postgr.es/m/002f01d748ac$eaa781a0$bff684e0$@tju.edu.cn
---
 src/backend/access/transam/parallel.c | 63 +++++++++++++++++++++------
 1 file changed, 49 insertions(+), 14 deletions(-)

diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 5f690393fc..66d6c2f368 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -254,8 +254,11 @@ InitializeParallelDSM(ParallelContext *pcxt)
 		shm_toc_estimate_chunk(&pcxt->estimator, guc_len);
 		combocidlen = EstimateComboCIDStateSpace();
 		shm_toc_estimate_chunk(&pcxt->estimator, combocidlen);
-		tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
-		shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
+		if (IsolationUsesXactSnapshot())
+		{
+			tsnaplen = EstimateSnapshotSpace(transaction_snapshot);
+			shm_toc_estimate_chunk(&pcxt->estimator, tsnaplen);
+		}
 		asnaplen = EstimateSnapshotSpace(active_snapshot);
 		shm_toc_estimate_chunk(&pcxt->estimator, asnaplen);
 		tstatelen = EstimateTransactionStateSpace();
@@ -366,11 +369,19 @@ InitializeParallelDSM(ParallelContext *pcxt)
 		SerializeComboCIDState(combocidlen, combocidspace);
 		shm_toc_insert(pcxt->toc, PARALLEL_KEY_COMBO_CID, combocidspace);
 
-		/* Serialize transaction snapshot and active snapshot. */
-		tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
-		SerializeSnapshot(transaction_snapshot, tsnapspace);
-		shm_toc_insert(pcxt->toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT,
-					   tsnapspace);
+		/*
+		 * Serialize the transaction snapshot if the transaction
+		 * isolation-level uses a transaction snapshot.
+		 */
+		if (IsolationUsesXactSnapshot())
+		{
+			tsnapspace = shm_toc_allocate(pcxt->toc, tsnaplen);
+			SerializeSnapshot(transaction_snapshot, tsnapspace);
+			shm_toc_insert(pcxt->toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT,
+						   tsnapspace);
+		}
+
+		/* Serialize the active snapshot. */
 		asnapspace = shm_toc_allocate(pcxt->toc, asnaplen);
 		SerializeSnapshot(active_snapshot, asnapspace);
 		shm_toc_insert(pcxt->toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, asnapspace);
@@ -1408,14 +1419,38 @@ ParallelWorkerMain(Datum main_arg)
 		shm_toc_lookup(toc, PARALLEL_KEY_SESSION_DSM, false);
 	AttachSession(*(dsm_handle *) session_dsm_handle_space);
 
-	/* Restore transaction snapshot. */
-	tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, false);
-	RestoreTransactionSnapshot(RestoreSnapshot(tsnapspace),
-							   fps->parallel_leader_pgproc);
-
-	/* Restore active snapshot. */
+	/*
+	 * If the transaction isolation level is REPEATABLE READ or SERIALIZABLE,
+	 * the leader has serialized the transaction snapshot and we must restore
+	 * it. At lower isolation levels, there is no transaction-lifetime
+	 * snapshot, but we need TransactionXmin to get set to a value which is
+	 * less than or equal to the xmin of every snapshot that will be used by
+	 * this worker. The easiest way to accomplish that is to install the
+	 * active snapshot as the transaction snapshot. Code running in this
+	 * parallel worker might take new snapshots via GetTransactionSnapshot()
+	 * or GetLatestSnapshot(), but it shouldn't have any way of acquiring a
+	 * snapshot older than the active snapshot.
+	 */
 	asnapspace = shm_toc_lookup(toc, PARALLEL_KEY_ACTIVE_SNAPSHOT, false);
-	PushActiveSnapshot(RestoreSnapshot(asnapspace));
+	tsnapspace = shm_toc_lookup(toc, PARALLEL_KEY_TRANSACTION_SNAPSHOT, true);
+	if (tsnapspace)
+	{
+		RestoreTransactionSnapshot(RestoreSnapshot(tsnapspace),
+								   fps->parallel_leader_pgproc);
+
+		/* Restore active snapshot. */
+		PushActiveSnapshot(RestoreSnapshot(asnapspace));
+	}
+	else
+	{
+		/*
+		 * Restore the active snapshot and install it as the transaction
+		 * snapshot.
+		 */
+		RestoreTransactionSnapshot(RestoreSnapshot(asnapspace),
+								   fps->parallel_leader_pgproc);
+		PushActiveSnapshot(GetTransactionSnapshot());
+	}
 
 	/*
 	 * We've changed which tuples we can see, and must therefore invalidate
-- 
2.27.0

