From a50098891308e84c8149b9a7bc63f1ea541c0726 Mon Sep 17 00:00:00 2001
From: Jerry Jelinek <jerry.jelinek@joyent.com>
Date: Thu, 7 Mar 2019 21:00:45 +0000
Subject: [PATCH] wal_recycle and wal_init_zero

---
 doc/src/sgml/config.sgml                      | 31 +++++++++
 src/backend/access/transam/xlog.c             | 96 +++++++++++++++++----------
 src/backend/utils/misc/guc.c                  | 23 +++++++
 src/backend/utils/misc/postgresql.conf.sample |  2 +
 src/include/access/xlog.h                     |  2 +
 5 files changed, 120 insertions(+), 34 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6d42b7afe7..0531662a0b 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3573,6 +3573,37 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"'  # Windows
        </listitem>
       </varlistentry>
 
+     <varlistentry id="guc-wal-init-zero" xreflabel="wal_init_zero">
+      <term><varname>wal_init_zero</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>wal_init_zero</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Disabling this option prevents zero-filling new WAL files.
+        This parameter should only be set to <literal>off</literal> when the WAL
+        resides on a <firstterm>Copy-On-Write</firstterm> (<acronym>COW</acronym>)
+        filesystem.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-wal-recycle" xreflabel="wal_recycle">
+      <term><varname>wal_recycle</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>wal_recycle</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Disabling this option prevents WAL file recycling.
+        This parameter should only be set to <literal>off</literal> when the WAL
+        resides on a COW filesystem.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-wal-sender-timeout" xreflabel="wal_sender_timeout">
       <term><varname>wal_sender_timeout</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index ecd12fc53a..3d83229660 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -94,6 +94,8 @@ bool		wal_log_hints = false;
 bool		wal_compression = false;
 char	   *wal_consistency_checking_string = NULL;
 bool	   *wal_consistency_checking = NULL;
+bool		wal_init_zero = true;
+bool		wal_recycle = true;
 bool		log_checkpoints = false;
 int			sync_method = DEFAULT_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_MINIMAL;
@@ -3216,6 +3218,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 	XLogSegNo	max_segno;
 	int			fd;
 	int			nbytes;
+	bool		fail;
 
 	XLogFilePath(path, ThisTimeLineID, logsegno, wal_segment_size);
 
@@ -3255,39 +3258,59 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 				(errcode_for_file_access(),
 				 errmsg("could not create file \"%s\": %m", tmppath)));
 
-	/*
-	 * Zero-fill the file.  We have to do this the hard way to ensure that all
-	 * the file space has really been allocated --- on platforms that allow
-	 * "holes" in files, just seeking to the end doesn't allocate intermediate
-	 * space.  This way, we know that we have all the space and (after the
-	 * fsync below) that all the indirect blocks are down on disk.  Therefore,
-	 * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
-	 * log file.
-	 */
 	memset(zbuffer.data, 0, XLOG_BLCKSZ);
-	for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
-	{
+
+	if (wal_init_zero) {
+		/*
+		 * Zero-fill the file.  We have to do this the hard way to ensure that
+		 * all the file space has really been allocated --- on platforms that
+		 * allow "holes" in files, just seeking to the end doesn't allocate
+		 * intermediate space.  This way, we know that we have all the space
+		 * and (after the fsync below) that all the indirect blocks are down on
+		 * disk.  Therefore, fdatasync(2) or O_DSYNC will be sufficient to sync
+		 * future writes to the log file.
+		 */
+		fail = false;	/* keep compiler quiet */
+		for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
+		{
+			errno = 0;
+			pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
+			if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
+				fail = true;
+			pgstat_report_wait_end();
+			if (fail)
+				break;
+		}
+	}
+	else
+ 	{
+		/*
+		 * Otherwise, seeking to the end and writing a solitary byte is enough.
+		 */
 		errno = 0;
 		pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
-		if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
-		{
-			int			save_errno = errno;
+		fail = lseek(fd, wal_segment_size - 1, SEEK_SET) < (off_t) 0 ||
+			(int) write(fd, zbuffer.data, 1) != (int) 1;
+		pgstat_report_wait_end();
+	}
 
-			/*
-			 * If we fail to make the file, delete it to release disk space
-			 */
-			unlink(tmppath);
+	if (fail)
+	{
+		int			save_errno = errno;
 
-			close(fd);
+		/*
+		 * If we fail to make the file, delete it to release disk space
+		 */
+		unlink(tmppath);
 
-			/* if write didn't set errno, assume problem is no disk space */
-			errno = save_errno ? save_errno : ENOSPC;
+		close(fd);
 
-			ereport(ERROR,
-					(errcode_for_file_access(),
-					 errmsg("could not write to file \"%s\": %m", tmppath)));
-		}
-		pgstat_report_wait_end();
+		/* if write didn't set errno, assume problem is no disk space */
+		errno = save_errno ? save_errno : ENOSPC;
+
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not write to file \"%s\": %m", tmppath)));
 	}
 
 	pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_SYNC);
@@ -4053,14 +4076,18 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
 	XLogSegNo	endlogSegNo;
 	XLogSegNo	recycleSegNo;
 
-	/*
-	 * Initialize info about where to try to recycle to.
-	 */
-	XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
-	if (RedoRecPtr == InvalidXLogRecPtr)
-		recycleSegNo = endlogSegNo + 10;
+	if (wal_recycle) {
+		/*
+		 * Initialize info about where to try to recycle to.
+		 */
+		XLByteToSeg(endptr, endlogSegNo, wal_segment_size);
+		if (RedoRecPtr == InvalidXLogRecPtr)
+			recycleSegNo = endlogSegNo + 10;
+		else
+			recycleSegNo = XLOGfileslop(RedoRecPtr);
+	}
 	else
-		recycleSegNo = XLOGfileslop(RedoRecPtr);
+		recycleSegNo = (XLogSegNo) 0;	/* keep compiler quiet */
 
 	snprintf(path, MAXPGPATH, XLOGDIR "/%s", segname);
 
@@ -4069,7 +4096,8 @@ RemoveXlogFile(const char *segname, XLogRecPtr RedoRecPtr, XLogRecPtr endptr)
 	 * segment. Only recycle normal files, pg_standby for example can create
 	 * symbolic links pointing to a separate archive directory.
 	 */
-	if (endlogSegNo <= recycleSegNo &&
+	if (wal_recycle &&
+		endlogSegNo <= recycleSegNo &&
 		lstat(path, &statbuf) == 0 && S_ISREG(statbuf.st_mode) &&
 		InstallXLogFileSegment(&endlogSegNo, path,
 							   true, recycleSegNo, true))
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 156d147c85..5ae3f2043e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1177,6 +1177,29 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"wal_init_zero", PGC_SUSET, WAL_SETTINGS,
+			gettext_noop("Zero-fill WAL file."),
+			gettext_noop("This option disables zero-filling a new WAL file, "
+				"which can be beneficial in some cases, such as on COW "
+				"filesystems.")
+		},
+		&wal_init_zero,
+		true,
+		NULL, NULL, NULL
+	},
+
+	{
+		{"wal_recycle", PGC_SUSET, WAL_SETTINGS,
+			gettext_noop("WAL recycling is enabled."),
+			gettext_noop("This option disables WAL recycling, which can be "
+				"beneficial in some cases, such as on COW filesystems.")
+		},
+		&wal_recycle,
+		true,
+		NULL, NULL, NULL
+	},
+
 	{
 		{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
 			gettext_noop("Logs each checkpoint."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index bd6ea65d0c..43597ffd7f 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -206,6 +206,8 @@
 #wal_compression = off			# enable compression of full-page writes
 #wal_log_hints = off			# also do full page writes of non-critical updates
 					# (change requires restart)
+#wal_init_zero = on	# zero-fill new WAL files
+#wal_recycle = on	# recycle WAL files
 #wal_buffers = -1			# min 32kB, -1 sets based on shared_buffers
 					# (change requires restart)
 #wal_writer_delay = 200ms		# 1-10000 milliseconds
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f90a6a9139..865f59eee1 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -116,6 +116,8 @@ extern bool EnableHotStandby;
 extern bool fullPageWrites;
 extern bool wal_log_hints;
 extern bool wal_compression;
+extern bool wal_init_zero;
+extern bool wal_recycle;
 extern bool *wal_consistency_checking;
 extern char *wal_consistency_checking_string;
 extern bool log_checkpoints;
-- 
2.15.1

