commit 0acb3def366c4795970ca1b53bf1918f7c85b924
Author: Chris Traverswq <chris.travers@gmail.com>
Date:   Sun Mar 17 17:32:01 2019 +0800

    Initial patch submission

diff --git a/doc/src/sgml/ref/pg_rewind.sgml b/doc/src/sgml/ref/pg_rewind.sgml
index 53a64ee29e..105ad13aad 100644
--- a/doc/src/sgml/ref/pg_rewind.sgml
+++ b/doc/src/sgml/ref/pg_rewind.sgml
@@ -48,16 +48,26 @@ PostgreSQL documentation
   </para>
 
   <para>
-   The result is equivalent to replacing the target data directory with the
-   source one. Only changed blocks from relation files are copied;
-   all other files are copied in full, including configuration files. The
-   advantage of <application>pg_rewind</application> over taking a new base backup, or
-   tools like <application>rsync</application>, is that <application>pg_rewind</application> does
-   not require reading through unchanged blocks in the cluster. This makes
+   The result is equivalent to replacing the data-related files in the  target
+   data directory with the source one. Only changed blocks from relation files
+   are copied; all other files relating to control or WAL information are copied
+   in full. The advantage of <application>pg_rewind</application> over taking a new base
+   backup, or tools like <application>rsync</application>, is that <application>pg_rewind</application>
+   does not require reading through unchanged blocks in the cluster. This makes
    it a lot faster when the database is large and only a small
    fraction of blocks differ between the clusters.
   </para>
 
+  <para>
+    A second advantage is predictability.  <application>pg_rewind</application> is aware of
+    what directories are relevant to restoring replication and which ones are not.
+    The result is that you get something of a guaranteed state at the end.  Log
+    files on the source are unlikely to clobber those on the client.  The
+    <filename>postgresql.conf.auto</filename> is unlikely to be copied over.  Replication
+    slot information is removed (and must be added again), and so forth.  Your
+    system is as it had been before, but the data is synchronized from the master.
+  </para>
+
   <para>
    <application>pg_rewind</application> examines the timeline histories of the source
    and target clusters to determine the point where they diverged, and
@@ -244,7 +254,7 @@ PostgreSQL documentation
 
    <para>
     The basic idea is to copy all file system-level changes from the source
-    cluster to the target cluster:
+    cluster to the target cluster if they implicate the data stored:
    </para>
 
    <procedure>
diff --git a/src/bin/pg_rewind/copy_fetch.c b/src/bin/pg_rewind/copy_fetch.c
index a283405f6c..7c42e96474 100644
--- a/src/bin/pg_rewind/copy_fetch.c
+++ b/src/bin/pg_rewind/copy_fetch.c
@@ -21,9 +21,31 @@
 #include "logging.h"
 #include "pg_rewind.h"
 
-static void recurse_dir(const char *datadir, const char *path,
+void recurse_dir(const char *datadir, const char *path,
 			process_file_callback_t callback);
 
+/* List of directories to synchronize:
+ * base data dirs (and ablespaces)
+ * wal/transaction data
+ * and that is it.
+ *
+ * This array is null-terminated to make
+ * it easy to expand
+ */
+
+const char *rewind_dirs[] = {
+    "base",         // Default tablespace
+    "global",       // global tablespace
+    "pg_commit_ts", // In case we need to do PITR before up to sync
+    "pg_logical",   // WAL related and no good reason to exclude
+    "pg_multixact", // WAL related and may need for vacuum-related reasons
+    "pg_tblspc",    // Pther tablespaces
+    "pg_twophase",  // mostly to *clear*
+    "pg_wal",       // WAL
+    "pg_xact",      // Commits of transactions
+    NULL
+};
+
 static void execute_pagemap(datapagemap_t *pagemap, const char *path);
 
 /*
@@ -31,18 +53,21 @@ static void execute_pagemap(datapagemap_t *pagemap, const char *path);
  * for each file.
  */
 void
-traverse_datadir(const char *datadir, process_file_callback_t callback)
+traverse_rewinddirs(const char *datadir, process_file_callback_t callback)
 {
-	recurse_dir(datadir, NULL, callback);
+	int i;
+	for(i = 0; rewind_dirs[i] != NULL; i++){
+		recurse_dir(datadir, rewind_dirs[i], callback);
+	}
 }
 
 /*
- * recursive part of traverse_datadir
+ * recursive part of traverse_rewinddirs
  *
  * parentpath is the current subdirectory's path relative to datadir,
  * or NULL at the top level.
  */
-static void
+void
 recurse_dir(const char *datadir, const char *parentpath,
 			process_file_callback_t callback)
 {
diff --git a/src/bin/pg_rewind/fetch.c b/src/bin/pg_rewind/fetch.c
index 03a5fd675f..df42f6c4c6 100644
--- a/src/bin/pg_rewind/fetch.c
+++ b/src/bin/pg_rewind/fetch.c
@@ -27,8 +27,12 @@
 void
 fetchSourceFileList(void)
 {
-	if (datadir_source)
-		traverse_datadir(datadir_source, &process_source_file);
+	if (datadir_source){
+		if(data_only)
+			traverse_rewinddirs(datadir_source, &process_source_file);
+		else
+			recurse_dir(datadir_source, NULL, &process_source_file);
+	}
 	else
 		libpqProcessFileList();
 }
diff --git a/src/bin/pg_rewind/fetch.h b/src/bin/pg_rewind/fetch.h
index a694e8b157..b76fa62313 100644
--- a/src/bin/pg_rewind/fetch.h
+++ b/src/bin/pg_rewind/fetch.h
@@ -36,9 +36,11 @@ extern void libpqConnect(const char *connstr);
 extern XLogRecPtr libpqGetCurrentXlogInsertLocation(void);
 
 /* in copy_fetch.c */
+extern const char *rewind_dirs[];
 extern void copy_executeFileMap(filemap_t *map);
 
 typedef void (*process_file_callback_t) (const char *path, file_type_t type, size_t size, const char *link_target);
-extern void traverse_datadir(const char *datadir, process_file_callback_t callback);
+extern void recurse_dir(const char *datadir, const char *path, process_file_callback_t callback);
+extern void traverse_rewinddirs(const char *datadir, process_file_callback_t callback);
 
 #endif							/* FETCH_H */
diff --git a/src/bin/pg_rewind/libpq_fetch.c b/src/bin/pg_rewind/libpq_fetch.c
index d06e277432..7fb5fdc6c6 100644
--- a/src/bin/pg_rewind/libpq_fetch.c
+++ b/src/bin/pg_rewind/libpq_fetch.c
@@ -153,8 +153,8 @@ libpqProcessFileList(void)
 {
 	PGresult   *res;
 	const char *sql;
-	int			i;
-
+	int			i, p;
+	
 	/*
 	 * Create a recursive directory listing of the whole data directory.
 	 *
@@ -169,7 +169,8 @@ libpqProcessFileList(void)
 		"WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
 		"  SELECT '' AS path, filename, size, isdir FROM\n"
 		"  (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
-		"        pg_stat_file(fn.filename, true) AS this\n"
+		"   LATERAL pg_stat_file(fn.filename, true) AS this\n"
+		"  WHERE filename = $1 OR $2\n"
 		"  UNION ALL\n"
 		"  SELECT parent.path || parent.filename || '/' AS path,\n"
 		"         fn, this.size, this.isdir\n"
@@ -183,44 +184,59 @@ libpqProcessFileList(void)
 		"FROM files\n"
 		"LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
 		"                             AND oid::text = files.filename\n";
-	res = PQexec(conn, sql);
 
-	if (PQresultStatus(res) != PGRES_TUPLES_OK)
-		pg_fatal("could not fetch file list: %s",
-				 PQresultErrorMessage(res));
+	/* Going through the directories in a loop.  Doing it this way
+	 * makes it easier to add more inclusions later.
+	 *
+	 * Note that the query filters out on top-level directories before
+	 * recursion so this will not give us problems in terms of listing
+	 * lots of files many times.
+	 */
+	for (p = 0; rewind_dirs[p] != NULL; ++p)
+	{
+		const char *paths[2];
+		paths[0] = rewind_dirs[p];
+		paths[1] = data_only ? "f" : "t";
+		res = PQexecParams(conn, sql,  2, NULL, paths, NULL, NULL, 0);
 
-	/* sanity check the result set */
-	if (PQnfields(res) != 4)
-		pg_fatal("unexpected result set while fetching file list\n");
+		if (PQresultStatus(res) != PGRES_TUPLES_OK)
+			pg_fatal("could not fetch file list: %s",
+				 PQresultErrorMessage(res));
 
-	/* Read result to local variables */
-	for (i = 0; i < PQntuples(res); i++)
-	{
-		char	   *path = PQgetvalue(res, i, 0);
-		int64		filesize = atol(PQgetvalue(res, i, 1));
-		bool		isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
-		char	   *link_target = PQgetvalue(res, i, 3);
-		file_type_t type;
+		/* sanity check the result set */
+		if (PQnfields(res) != 4)
+			pg_fatal("unexpected result set while fetching file list\n");
 
-		if (PQgetisnull(res, 0, 1))
+		/* Read result to local variables */
+		for (i = 0; i < PQntuples(res); i++)
 		{
-			/*
-			 * The file was removed from the server while the query was
-			 * running. Ignore it.
-			 */
-			continue;
+			char	   *path = PQgetvalue(res, i, 0);
+			int64		filesize = atol(PQgetvalue(res, i, 1));
+			bool		isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
+			char	   *link_target = PQgetvalue(res, i, 3);
+			file_type_t type;
+
+			if (PQgetisnull(res, 0, 1))
+			{
+				/*
+				 * The file was removed from the server while the query was
+				 * running. Ignore it.
+				 */
+				continue;
+			}
+
+			if (link_target[0])
+				type = FILE_TYPE_SYMLINK;
+			else if (isdir)
+				type = FILE_TYPE_DIRECTORY;
+			else
+				type = FILE_TYPE_REGULAR;
+			process_source_file(path, type, filesize, link_target);
 		}
-
-		if (link_target[0])
-			type = FILE_TYPE_SYMLINK;
-		else if (isdir)
-			type = FILE_TYPE_DIRECTORY;
-		else
-			type = FILE_TYPE_REGULAR;
-
-		process_source_file(path, type, filesize, link_target);
+		PQclear(res);
+		if (!data_only)
+			break;
 	}
-	PQclear(res);
 }
 
 /*----
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 7f1d6bf48a..3fbaeb71f8 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -57,6 +57,7 @@ bool		debug = false;
 bool		showprogress = false;
 bool		dry_run = false;
 bool		do_sync = true;
+bool		data_only = false;
 
 /* Target history */
 TimeLineHistoryEntry *targetHistory;
@@ -72,6 +73,7 @@ usage(const char *progname)
 	printf(_("      --source-pgdata=DIRECTORY  source data directory to synchronize with\n"));
 	printf(_("      --source-server=CONNSTR    source server to synchronize with\n"));
 	printf(_("  -n, --dry-run                  stop before modifying anything\n"));
+	printf(_("      --data-only                only rewind data files\n"));
 	printf(_("  -N, --no-sync                  do not wait for changes to be written\n"));
 	printf(_("                                 safely to disk\n"));
 	printf(_("  -P, --progress                 write progress messages\n"));
@@ -95,6 +97,7 @@ main(int argc, char **argv)
 		{"no-sync", no_argument, NULL, 'N'},
 		{"progress", no_argument, NULL, 'P'},
 		{"debug", no_argument, NULL, 3},
+		{"data-only", no_argument, NULL, 4},
 		{NULL, 0, NULL, 0}
 	};
 	int			option_index;
@@ -163,6 +166,9 @@ main(int argc, char **argv)
 			case 2:				/* --source-server */
 				connstr_source = pg_strdup(optarg);
 				break;
+			case 4:
+				data_only = true;
+				break;
 		}
 	}
 
@@ -308,7 +314,10 @@ main(int argc, char **argv)
 	pg_log(PG_PROGRESS, "reading source file list\n");
 	fetchSourceFileList();
 	pg_log(PG_PROGRESS, "reading target file list\n");
-	traverse_datadir(datadir_target, &process_target_file);
+	if(data_only)
+		traverse_rewinddirs(datadir_target, &process_target_file);
+	else
+		recurse_dir(datadir_target, NULL, &process_target_file);
 
 	/*
 	 * Read the target WAL from last checkpoint before the point of fork, to
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index 83b2898b8b..87996aedd4 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -25,6 +25,7 @@ extern bool debug;
 extern bool showprogress;
 extern bool dry_run;
 extern int	WalSegSz;
+extern bool data_only;
 
 /* Target history */
 extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_rewind/t/006_extrafiles_min.pl b/src/bin/pg_rewind/t/006_extrafiles_min.pl
new file mode 100644
index 0000000000..c2d5e7853f
--- /dev/null
+++ b/src/bin/pg_rewind/t/006_extrafiles_min.pl
@@ -0,0 +1,95 @@
+# Test how pg_rewind reacts to extra files and directories in the data dirs.
+
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 4;
+use Data::Dumper;
+
+use File::Find;
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+
+sub run_test
+{
+	my $test_mode = shift;
+
+	RewindTest::setup_cluster($test_mode);
+	RewindTest::start_master();
+
+	my $test_master_datadir = $node_master->data_dir;
+
+	# Create a subdir and files that will be present in both
+	mkdir "$test_master_datadir/tst_both_dir";
+	append_to_file "$test_master_datadir/tst_both_dir/both_file1", "in both1";
+	append_to_file "$test_master_datadir/tst_both_dir/both_file2", "in both2";
+	mkdir "$test_master_datadir/tst_both_dir/both_subdir/";
+	append_to_file "$test_master_datadir/tst_both_dir/both_subdir/both_file3",
+	  "in both3";
+
+	RewindTest::create_standby($test_mode);
+
+	# Create different subdirs and files in master and standby
+	my $test_standby_datadir = $node_standby->data_dir;
+
+	mkdir "$test_standby_datadir/tst_standby_dir";
+	append_to_file "$test_standby_datadir/tst_standby_dir/standby_file1",
+	  "in standby1";
+	append_to_file "$test_standby_datadir/tst_standby_dir/standby_file2",
+	  "in standby2";
+	mkdir "$test_standby_datadir/tst_standby_dir/standby_subdir/";
+	append_to_file
+	  "$test_standby_datadir/tst_standby_dir/standby_subdir/standby_file3",
+	  "in standby3";
+
+	mkdir "$test_master_datadir/tst_master_dir";
+	append_to_file "$test_master_datadir/tst_master_dir/master_file1",
+	  "in master1";
+	append_to_file "$test_master_datadir/tst_master_dir/master_file2",
+	  "in master2";
+	mkdir "$test_master_datadir/tst_master_dir/master_subdir/";
+	append_to_file
+	  "$test_master_datadir/tst_master_dir/master_subdir/master_file3",
+	  "in master3";
+
+	RewindTest::promote_standby();
+	RewindTest::run_pg_rewind($test_mode, 1);
+
+	# List files in the data directory after rewind.
+	my @paths;
+	find(
+		sub {
+			push @paths, $File::Find::name
+			  if $File::Find::name =~ m/.*tst_.*/;
+		},
+		$test_master_datadir);
+	@paths = sort @paths;
+	is_deeply(
+		\@paths,
+		[
+			"$test_master_datadir/tst_both_dir",
+			"$test_master_datadir/tst_both_dir/both_file1",
+			"$test_master_datadir/tst_both_dir/both_file2",
+			"$test_master_datadir/tst_both_dir/both_subdir",
+			"$test_master_datadir/tst_both_dir/both_subdir/both_file3",
+			"$test_master_datadir/tst_master_dir",
+			"$test_master_datadir/tst_master_dir/master_file1",
+			"$test_master_datadir/tst_master_dir/master_file2",
+			"$test_master_datadir/tst_master_dir/master_subdir",
+			"$test_master_datadir/tst_master_dir/master_subdir/master_file3"
+		],
+		"file lists match") or diag(Dumper(\@paths));
+
+	RewindTest::clean_rewind_test();
+	return;
+}
+
+# Run the test in both modes.
+run_test('local');
+run_test('remote');
+
+exit(0);
diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm
index 85cae7e47b..5137c812c5 100644
--- a/src/bin/pg_rewind/t/RewindTest.pm
+++ b/src/bin/pg_rewind/t/RewindTest.pm
@@ -200,6 +200,7 @@ sub promote_standby
 sub run_pg_rewind
 {
 	my $test_mode       = shift;
+	my $data_only       = shift;
 	my $master_pgdata   = $node_master->data_dir;
 	my $standby_pgdata  = $node_standby->data_dir;
 	my $standby_connstr = $node_standby->connstr('postgres');
@@ -232,7 +233,8 @@ sub run_pg_rewind
 				"--debug",
 				"--source-pgdata=$standby_pgdata",
 				"--target-pgdata=$master_pgdata",
-				"--no-sync"
+				"--no-sync",
+				($data_only ? '--data-only' : ())
 			],
 			'pg_rewind local');
 	}
@@ -245,7 +247,8 @@ sub run_pg_rewind
 				'pg_rewind',       "--debug",
 				"--source-server", $standby_connstr,
 				"--target-pgdata=$master_pgdata",
-				"--no-sync"
+				"--no-sync",
+				($data_only ? '--data-only' : ())
 			],
 			'pg_rewind remote');
 	}
