From 17c0fa7a61618a2e088b3f88f7e6aa8fa7c47376 Mon Sep 17 00:00:00 2001
From: "duankunren.dkr" <duankunren.dkr@alibaba-inc.com>
Date: Fri, 20 Mar 2026 13:55:03 +0800
Subject: [PATCH] Add multixact backwards compatibility test

Test the crash-restart code path in RecordNewMultiXact() compat
logic, verifying that SimpleLruDoesPhysicalPageExist() correctly
detects and initializes a missing offsets page after crash-restart.
---
 src/backend/access/transam/multixact.c        |  17 ++
 src/test/recovery/meson.build                 |   3 +-
 .../t/049_multixact_backwards_compat.pl       | 162 ++++++++++++++++++
 3 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 src/test/recovery/t/049_multixact_backwards_compat.pl

diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 8594faa944f..fa01b8294ed 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -89,6 +89,7 @@
 #include "storage/procarray.h"
 #include "utils/fmgrprotos.h"
 #include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 
 
@@ -913,6 +914,22 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
 	 */
 	multi = GetNewMultiXactId(nmembers, &offset);
 
+#ifdef USE_INJECTION_POINTS
+	/*
+	 * Injection point for testing the compat logic race condition.
+	 * At this point, nextMXact has been advanced but CREATE_ID has not
+	 * been written to WAL yet.  A checkpoint running now will capture
+	 * the advanced nextMXact without the corresponding CREATE_ID.
+	 *
+	 * We temporarily leave the critical section to allow the injection
+	 * point callback to allocate memory.  This is safe for testing
+	 * because we haven't modified any SLRU data yet.
+	 */
+	END_CRIT_SECTION();
+	INJECTION_POINT("multixact-create-from-members-after-get-id");
+	START_CRIT_SECTION();
+#endif
+
 	/* Make an XLOG entry describing the new MXID. */
 	xlrec.mid = multi;
 	xlrec.moff = offset;
diff --git a/src/test/recovery/meson.build b/src/test/recovery/meson.build
index 9f01d71dc3b..ffc356dd6c3 100644
--- a/src/test/recovery/meson.build
+++ b/src/test/recovery/meson.build
@@ -55,7 +55,8 @@ tests += {
       't/045_archive_restartpoint.pl',
       't/046_checkpoint_logical_slot.pl',
       't/047_checkpoint_physical_slot.pl',
-      't/048_vacuum_horizon_floor.pl'
+      't/048_vacuum_horizon_floor.pl',
+      't/049_multixact_backwards_compat.pl'
     ],
   },
 }
diff --git a/src/test/recovery/t/049_multixact_backwards_compat.pl b/src/test/recovery/t/049_multixact_backwards_compat.pl
new file mode 100644
index 00000000000..41a85cd4c96
--- /dev/null
+++ b/src/test/recovery/t/049_multixact_backwards_compat.pl
@@ -0,0 +1,162 @@
+# Copyright (c) 2026, PostgreSQL Global Development Group
+
+# Test the backwards-compatibility logic in RecordNewMultiXact() that
+# initializes the next offsets page when replaying WAL from older minor
+# versions.  After a crash-restart, the tracking state is unknown, so the
+# code falls back to SimpleLruDoesPhysicalPageExist() to detect whether
+# the page needs initialization.
+#
+# The test sets nextMulti near a page boundary, uses an injection point to
+# create a window where CHECKPOINT captures the advanced nextMulti before
+# CREATE_ID is written, then crashes the standby, truncates the offset file
+# to simulate the old-version condition, and verifies the compat logic
+# initializes the missing page on restart.
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my $blcksz = 8192;
+my $offsets_per_page = $blcksz / 4;    # 2048
+
+# Initialize primary with a long checkpoint_timeout to prevent automatic
+# checkpoints from interfering with manual checkpoint timing.
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 'physical');
+$node_primary->append_conf('postgresql.conf', qq{
+checkpoint_timeout = 600
+log_min_messages = debug1
+});
+$node_primary->start;
+
+# Check injection_points extension availability.
+my $result = $node_primary->safe_psql('postgres',
+	"SELECT count(*) > 0 FROM pg_available_extensions WHERE name = 'injection_points';");
+if ($result eq 'f')
+{
+	plan skip_all => 'Extension injection_points not installed';
+}
+
+$node_primary->safe_psql('postgres', 'CREATE EXTENSION injection_points');
+$node_primary->safe_psql('postgres',
+	"CREATE TABLE t(id int PRIMARY KEY);
+	 INSERT INTO t SELECT generate_series(1,10);");
+
+# Advance nextMulti to the last entry on offset page 0, so the next
+# allocation crosses a page boundary.
+$node_primary->stop;
+my $target_multi = $offsets_per_page - 1;    # 2047
+command_ok(
+	['pg_resetwal', '-m', "$target_multi,1",
+	 $node_primary->data_dir],
+	'pg_resetwal to set nextMulti near page boundary');
+$node_primary->start;
+
+# Verify nextMulti is where we expect.
+my $next_multi = $node_primary->safe_psql('postgres',
+	"SELECT next_multixact_id FROM pg_control_checkpoint()");
+is($next_multi % $offsets_per_page, $offsets_per_page - 1,
+	'nextMulti is at last entry of offset page');
+
+# Take a backup and create the standby.
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+	has_streaming => 1);
+$node_standby->append_conf('postgresql.conf', qq{
+log_min_messages = debug1
+});
+$node_standby->start;
+$node_primary->wait_for_replay_catchup($node_standby);
+
+# Pause after GetNewMultiXactId() but before XLogInsert(CREATE_ID).
+$node_primary->safe_psql('postgres',
+	"SELECT injection_points_attach('multixact-create-from-members-after-get-id', 'wait');");
+
+# Session A: hold a FOR SHARE lock on the row.
+my $psql_a = $node_primary->background_psql('postgres', on_error_stop => 0);
+$psql_a->query_safe("BEGIN; SELECT * FROM t WHERE id = 1 FOR SHARE;");
+
+# Flush dirty buffers so the checkpoint below won't block.
+$node_primary->safe_psql('postgres', 'CHECKPOINT');
+
+# Session B: trigger multixact creation by taking a second FOR SHARE
+# lock on the same row.  This hits the injection point.
+my $psql_b = $node_primary->background_psql('postgres', on_error_stop => 0);
+$psql_b->query_until(qr/starting_for_share/,
+	q(\echo starting_for_share
+SELECT * FROM t WHERE id = 1 FOR SHARE;
+));
+
+# Wait for the backend to hit the injection point.
+$node_primary->wait_for_event('client backend',
+	'multixact-create-from-members-after-get-id');
+note "backend paused at injection point";
+
+# CHECKPOINT while paused: captures the advanced nextMulti but not CREATE_ID.
+$node_primary->safe_psql('postgres', 'CHECKPOINT');
+
+# Ensure the standby does a restartpoint so it will replay from this
+# checkpoint's redo point (after ZERO_OFF_PAGE) on restart.
+$node_primary->wait_for_replay_catchup($node_standby);
+$node_standby->safe_psql('postgres', 'CHECKPOINT');
+
+# Crash the standby.
+$node_standby->stop('immediate');
+
+# Remove page 1 from the offset file to simulate the old-version condition
+# where ZERO_OFF_PAGE was never written for this page.
+my $standby_data = $node_standby->data_dir;
+my $offset_file = "$standby_data/pg_multixact/offsets/0000";
+ok(-f $offset_file, 'offset file exists on standby');
+
+my $orig_size = -s $offset_file;
+note "offset file size before truncation: $orig_size";
+
+truncate($offset_file, $blcksz)
+	or die "cannot truncate $offset_file: $!";
+
+my $new_size = -s $offset_file;
+is($new_size, $blcksz, 'offset file truncated to one page');
+
+# Wake up the injection point so CREATE_ID gets written to WAL.
+$node_primary->safe_psql('postgres',
+	"SELECT injection_points_wakeup('multixact-create-from-members-after-get-id');");
+
+# Clean up background sessions.
+$psql_a->query_safe("COMMIT;");
+$psql_a->quit;
+$psql_b->quit;
+
+$node_primary->safe_psql('postgres',
+	"SELECT injection_points_detach('multixact-create-from-members-after-get-id');");
+
+# Restart the standby.  The compat logic should detect the missing page
+# via SimpleLruDoesPhysicalPageExist() and initialize it.
+my $logstart = -s $node_standby->logfile;
+$node_standby->start;
+
+$node_primary->wait_for_replay_catchup($node_standby);
+
+# Verify the compat logic fired.
+ok($node_standby->log_contains(
+	"next offsets page is not initialized, initializing it now", $logstart),
+	'compat logic initialized the missing offsets page after crash-restart');
+
+# Verify the standby is healthy.
+my $check = $node_standby->safe_psql('postgres', 'SELECT 1');
+is($check, '1', 'standby is healthy after crash-restart');
+
+$node_standby->stop;
+$node_primary->stop;
+
+done_testing();
-- 
2.32.0.3.g01195cf9f

