From 26b34d171d2bb185a2d927b88a77a4b0cacb0c88 Mon Sep 17 00:00:00 2001
From: Asim R P <apraveen@pivotal.io>
Date: Thu, 19 Sep 2019 17:14:55 +0530
Subject: [PATCH] Tests for replay of create database operation on standby

A couple of tests to demonstrate that standby fails to replay a create
database WAL record during crash recovery, if one or more of underlying
directories are missing from the file system.  This can happen if a drop
tablespace or drop database WAL record has been replayed in archive
recovery, before a crash.  And then the create database record happens
to be replayed again during crash recovery.  The failures indicate bugs
that need to be fixed.

The first test, TEST 4, performs several DDL operations resulting in a
database directory being removed, along with a few create database
operations.  It expects crash recovery to succeed because for each
missing directory encountered during create database replay, a matching
drop tablespace or drop database WAL record is found later.

Second test, TEST 5, validates that a standby rightfully aborts replay
during archive recovery, if a missing directory is encountered when
replaying create database WAL record.

These tests have been proposed and implemented in various ways by
Alexandra, Anastasia, Kyotaro, Paul and me.
---
 src/test/recovery/t/011_crash_recovery.pl | 150 +++++++++++++++++++++++++++++-
 1 file changed, 149 insertions(+), 1 deletion(-)

diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl
index 526a3481fb..1cea17c7d4 100644
--- a/src/test/recovery/t/011_crash_recovery.pl
+++ b/src/test/recovery/t/011_crash_recovery.pl
@@ -6,6 +6,7 @@ use warnings;
 use PostgresNode;
 use TestLib;
 use Test::More;
+use File::Path qw(rmtree);
 use Config;
 if ($Config{osname} eq 'MSWin32')
 {
@@ -15,7 +16,7 @@ if ($Config{osname} eq 'MSWin32')
 }
 else
 {
-	plan tests => 3;
+	plan tests => 5;
 }
 
 my $node = get_new_node('master');
@@ -66,3 +67,150 @@ is($node->safe_psql('postgres', qq[SELECT txid_status('$xid');]),
 	'aborted', 'xid is aborted after crash');
 
 $tx->kill_kill;
+
+# TEST 4
+#
+# Ensure that a missing tablespace directory during crash recovery on
+# a standby is hangled correctly.  The standby should finish crash
+# recovery successfully because a matching drop database record is
+# found in the WAL.  The following scnearios are covered:
+#
+# 1. Create a database against a user-defined tablespace then drop the
+#    tablespace.
+#
+# 2. Move a database from source tablespace to target tablespace then
+#    drop the source tablespace.
+#
+# 3. Create a datbase from another database as template then drop the
+#    template database.
+
+my $node_master = get_new_node('master2');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+
+# Create tablespace
+my $dropme_ts_master = TestLib::tempdir;
+$dropme_ts_master = TestLib::perl2host($dropme_ts_master);
+my $source_ts_master = TestLib::tempdir;
+$source_ts_master = TestLib::perl2host($source_ts_master);
+my $target_ts_master = TestLib::tempdir;
+$target_ts_master = TestLib::perl2host($target_ts_master);
+
+$node_master->safe_psql('postgres',
+						qq[CREATE TABLESPACE dropme_ts location '$dropme_ts_master';
+						   CREATE TABLESPACE source_ts location '$source_ts_master';
+						   CREATE TABLESPACE target_ts location '$target_ts_master';
+						   CREATE DATABASE template_db IS_TEMPLATE = true;]);
+
+my $dropme_ts_standby = TestLib::tempdir;
+$dropme_ts_standby = TestLib::perl2host($dropme_ts_standby);
+my $source_ts_standby = TestLib::tempdir;
+$source_ts_standby = TestLib::perl2host($source_ts_standby);
+my $target_ts_standby = TestLib::tempdir;
+$target_ts_standby = TestLib::perl2host($target_ts_standby);
+
+# Take backup
+my $backup_name = 'my_backup';
+my $ts_mapping = "$dropme_ts_master=$dropme_ts_standby," .
+  "$source_ts_master=$source_ts_standby," .
+  "$target_ts_master=$target_ts_standby";
+$node_master->backup($backup_name, tablespace_mappings => $ts_mapping);
+
+my $node_standby = get_new_node('standby2');
+$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# Make sure connection is made
+$node_master->poll_query_until(
+	'postgres', 'SELECT count(*) = 1 FROM pg_stat_replication');
+
+# Make sure to perform restartpoint after tablespace creation
+$node_master->wait_for_catchup($node_standby, 'replay',
+							   $node_master->lsn('replay'));
+$node_standby->safe_psql('postgres', 'CHECKPOINT');
+
+# Do immediate shutdown just after a sequence of CREAT DATABASE / DROP
+# DATABASE / DROP TABLESPACE. This causes CREATE DATBASE WAL records
+# to be applied to already-removed directories.
+$node_master->safe_psql('postgres',
+						q[CREATE DATABASE dropme_db WITH TABLESPACE dropme_ts;
+						  CREATE DATABASE moveme_db TABLESPACE source_ts;
+						  ALTER DATABASE moveme_db SET TABLESPACE target_ts;
+						  DROP DATABASE dropme_db;
+						  CREATE DATABASE newdb TEMPLATE template_db;
+						  ALTER DATABASE template_db IS_TEMPLATE = false;
+						  DROP TABLESPACE source_ts;
+						  DROP TABLESPACE dropme_ts;
+						  DROP DATABASE template_db;]);
+$node_master->wait_for_catchup($node_standby, 'replay',
+							   $node_master->lsn('replay'));
+$node_standby->stop('immediate');
+
+# Should restart ignoring directory creation error.
+is($node_standby->start(fail_ok => 1), 1);
+
+# TEST 5
+#
+# Ensure that a missing tablespace directory during create database
+# replay immediately causes panic if the standby has already reached
+# consistent state (archive recovery is in progress).
+
+$node_master = get_new_node('master4');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+
+# Create tablespace
+my $tspDir_master = TestLib::tempdir;
+my $realTSDir_master = TestLib::perl2host($tspDir_master);
+$node_master->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$realTSDir_master'");
+$node_master->safe_psql('postgres', "CREATE DATABASE db1 TABLESPACE ts1");
+
+my $tspDir_standby = TestLib::tempdir;
+my $realTSDir_standby = TestLib::perl2host($tspDir_standby);
+
+# Take backup
+$backup_name = 'my_backup';
+$node_master->backup($backup_name,
+					 tablespace_mappings =>
+					   "$realTSDir_master=$realTSDir_standby");
+$node_standby = get_new_node('standby4');
+$node_standby->init_from_backup($node_master, $backup_name, has_streaming => 1);
+$node_standby->start;
+
+# Make sure standby reached consistency and starts accepting connections
+$node_standby->poll_query_until('postgres', 'SELECT 1', '1');
+
+# Remove standby tablespace directory so it will be missing when
+# replay resumes.
+#
+# XXX For some reason, the tablespace mapping is not honored and the
+# standby ends up getting a different temp dir than what was specified
+# in the tablepsace mapping.  So get the tablespace directory by
+# querying standby.
+$realTSDir_standby = $node_standby->safe_psql(
+	'postgres',
+	"select pg_tablespace_location(oid) from pg_tablespace where spcname = 'ts1'");
+rmtree($realTSDir_standby);
+
+# Create a database in the tablespace and a table in default tablespace
+$node_master->safe_psql('postgres',
+						q[CREATE TABLE should_not_replay_insertion(a int);
+						  CREATE DATABASE db2 WITH TABLESPACE ts1;
+						  INSERT INTO should_not_replay_insertion VALUES (1);]);
+
+# Standby should fail and should not silently skip replaying the wal
+if ($node_master->poll_query_until_params(
+		'postgres',
+		'SELECT count(*) = 0 FROM pg_stat_replication',
+		timeout => 5) == 1)
+{
+	pass('standby failed as expected');
+	# We know that the standby has failed.  Setting its pid to
+	# undefined avoids error when PostgreNode module tries to stop the
+	# standby node as part of tear_down sequence.
+	$node_standby->{_pid} = undef;
+}
+else
+{
+	fail('standby did not fail within 5 seconds');
+}
-- 
2.14.3 (Apple Git-98)

