From b085a49c49726c44b3a6a5f9be09170207afbd72 Mon Sep 17 00:00:00 2001
From: Dilip Kumar <dilip.kumar@enterprisedb.com>
Date: Wed, 22 Nov 2023 16:32:56 +0530
Subject: [PATCH] test-group-update-poc-no-for-commit

---
 src/backend/access/transam/clog.c             | 34 ++++---
 .../modules/test_injection_points/Makefile    |  2 +-
 .../t/003_clog_group_commit.pl                | 97 +++++++++++++++++++
 3 files changed, 121 insertions(+), 12 deletions(-)
 create mode 100644 src/test/modules/test_injection_points/t/003_clog_group_commit.pl

diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 18ec2a47b5..fa985bcd6e 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -44,6 +44,7 @@
 #include "storage/proc.h"
 #include "storage/sync.h"
 #include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
 
 /*
  * Defines for CLOG page sizes.  A page is the same BLCKSZ as is used
@@ -313,6 +314,8 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
 		 */
 		if (LWLockConditionalAcquire(lock, LW_EXCLUSIVE))
 		{
+			elog(LOG, "procno %d got the lock", MyProc->pgprocno);
+			INJECTION_POINT("ClogGroupCommit");
 			/* Got the lock without waiting!  Do the update. */
 			TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
 											   lsn, pageno);
@@ -321,6 +324,7 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
 		}
 		else if (TransactionGroupUpdateXidStatus(xid, status, lsn, pageno))
 		{
+			elog(LOG, "procno %d completed group update", MyProc->pgprocno);
 			/* Group update mechanism has done the work. */
 			return;
 		}
@@ -472,7 +476,10 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
 		if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst,
 										   &nextidx,
 										   (uint32) proc->pgprocno))
+		{
+			elog(LOG, "procno %d for xid %d added for group update", proc->pgprocno, xid);
 			break;
+		}
 	}
 
 	/*
@@ -485,6 +492,8 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
 	{
 		int			extraWaits = 0;
 
+		elog(LOG, "procno %d is follower and wait for group leader to update commit status of xid %d", proc->pgprocno, xid);
+
 		/* Sleep until the leader updates our XID status. */
 		pgstat_report_wait_start(WAIT_EVENT_XACT_GROUP_UPDATE);
 		for (;;)
@@ -502,20 +511,10 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
 		/* Fix semaphore count for any absorbed wakeups */
 		while (extraWaits-- > 0)
 			PGSemaphoreUnlock(proc->sem);
+		elog(LOG, "procno %d is follower and commit status of xid %d is updated by leader", proc->pgprocno, xid);
 		return true;
 	}
 
-	/*
-	 * We are leader so clear the list of processes waiting for group XID
-	 * status update, saving a pointer to the head of the list. Trying to pop
-	 * elements one at a time could lead to an ABA problem.
-	 */
-	nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst,
-									 INVALID_PGPROCNO);
-
-	/* Remember head of list so we can perform wakeups after dropping lock. */
-	wakeidx = nextidx;
-
 	/*
 	 * Acquire the SLRU bank lock for the first page in the group.  And if
 	 * there are multiple pages in the group which falls under different banks
@@ -529,6 +528,18 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
 	prevpageno = ProcGlobal->allProcs[nextidx].clogGroupMemberPage;
 	prevlock = SimpleLruGetBankLock(XactCtl, prevpageno);
 	LWLockAcquire(prevlock, LW_EXCLUSIVE);
+	elog(LOG, "procno %d is group leader and got the lock", proc->pgprocno);
+
+	/*
+	 * We are leader so clear the list of processes waiting for group XID
+	 * status update, saving a pointer to the head of the list. Trying to pop
+	 * elements one at a time could lead to an ABA problem.
+	 */
+	nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst,
+									 INVALID_PGPROCNO);
+
+	/* Remember head of list so we can perform wakeups after dropping lock. */
+	wakeidx = nextidx;
 
 	/* Walk the list and update the status of all XIDs. */
 	while (nextidx != INVALID_PGPROCNO)
@@ -567,6 +578,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
 										   nextproc->clogGroupMemberXidStatus,
 										   nextproc->clogGroupMemberLsn,
 										   nextproc->clogGroupMemberPage);
+		elog(LOG, "group leader updated status of xid %d", nextproc->clogGroupMemberXid);
 
 		/* Move to next proc in list. */
 		nextidx = pg_atomic_read_u32(&nextproc->clogGroupNext);
diff --git a/src/test/modules/test_injection_points/Makefile b/src/test/modules/test_injection_points/Makefile
index 4696c1b013..8974182b56 100644
--- a/src/test/modules/test_injection_points/Makefile
+++ b/src/test/modules/test_injection_points/Makefile
@@ -8,7 +8,7 @@ PGFILEDESC = "test_injection_points - test injection points"
 
 EXTENSION = test_injection_points
 DATA = test_injection_points--1.0.sql
-REGRESS = test_injection_points
+#REGRESS = test_injection_points
 
 TAP_TESTS = 1
 
diff --git a/src/test/modules/test_injection_points/t/003_clog_group_commit.pl b/src/test/modules/test_injection_points/t/003_clog_group_commit.pl
new file mode 100644
index 0000000000..229c798144
--- /dev/null
+++ b/src/test/modules/test_injection_points/t/003_clog_group_commit.pl
@@ -0,0 +1,97 @@
+# Test consistent of initial snapshot data.
+
+# This requires a node with wal_level=logical combined with an injection
+# point that forces a failure when a snapshot is initially built with a
+# logical slot created.
+#
+# See bug https://postgr.es/m/CAFiTN-s0zA1Kj0ozGHwkYkHwa5U0zUE94RSc_g81WrpcETB5=w@mail.gmail.com.
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init(allows_streaming => 'logical');
+$node->start;
+
+$node->safe_psql('postgres', 'CREATE EXTENSION test_injection_points;');
+$node->safe_psql('postgres', 'CREATE TABLE test(a int);');
+
+# Consume multiple xids so that next xids get generated in new banks
+$node->safe_psql(
+	'postgres', q{
+do $$
+begin
+  for i in 1..128001 loop
+    -- use an exception block so that each iteration eats an XID
+    begin
+      insert into test values (i);
+    exception
+      when division_by_zero then null;
+    end;
+  end loop;
+end$$;
+});
+
+my $result = $node->safe_psql('postgres',
+	"SELECT txid_current();");
+is($result, qq(128740),
+	'check column trigger applied even on update for other column');
+
+$node->safe_psql('postgres',
+  "SELECT test_injection_points_attach('ClogGroupCommit', 'wait');");
+
+
+# First session will get the slru lock and will wait on injection point
+my $session1 = $node->background_psql('postgres');
+
+$session1->query_until(
+	qr/start/, q(
+\echo start
+INSERT INTO test VALUES(1);
+));
+
+#create another 4 session which will not get the lock as first session is holding that lock
+#so these all will go for group update
+my $session2 = $node->background_psql('postgres');
+
+$session2->query_until(
+	qr/start/, q(
+\echo start
+INSERT INTO test VALUES(2);
+));
+
+my $session3 = $node->background_psql('postgres');
+
+$session3->query_until(
+	qr/start/, q(
+\echo start
+INSERT INTO test VALUES(3);
+));
+
+my $session4 = $node->background_psql('postgres');
+
+$session4->query_until(
+	qr/start/, q(
+\echo start
+INSERT INTO test VALUES(4);
+));
+
+my $session5 = $node->background_psql('postgres');
+
+$session5->query_until(
+	qr/start/, q(
+\echo start
+INSERT INTO test VALUES(5);
+));
+
+# Now wake up the first session and let next 4 session perform the group update
+$node->safe_psql('postgres',
+  "SELECT test_injection_points_wake();");
+$node->safe_psql('postgres',
+  "SELECT test_injection_points_detach('ClogGroupCommit');");
+
+done_testing();
-- 
2.39.2 (Apple Git-143)

