From 4eae04718f7f2d9f5b05ef9a7d7b1d8089c8d915 Mon Sep 17 00:00:00 2001
From: "Krishnakumar R (KK)" <kksrcv001@gmail.com>
Date: Wed, 4 Oct 2023 12:27:29 -0700
Subject: [PATCH v3] Remove BKI file token pre-processing logic from initdb.

With this patch genbki replaces some token at compile time. Some others
are initially populated with placeholder values from catalog/*.dat. Initdb
run time UPDATEs these place holders plus the left over token with
the respective configured values.

Here are more details:
- NAMEDATALEN, FLOAT8PASSBYVAL, SIZEOF_VOID_P, ALIGNOF_POINTER are replaced
  during compilation from genbki.pl by reading those from header files.
- SIZEOF_VOID_P is available only after configuration (in pg_config.h).
  A new parameter include-conf had to be added to genbki to point to header files
  generated after configuration.
- The pg_database.dat now has placeholder values which are filled in template1
  during creation. Initdb uses UPDATE to set the right values for rolname in
  pg_authid and rest of the configured values in pg_database.
- Earlier bki file was opened by initdb, and passed to postgres started in
  bootstrap mode. With this changes, the bki file is no longer opened in initdb,
  instead the file path is passed to bootstrap which solely handles the bki file.
  This means we have pass the file stream as yyin to allow the parsing from file
  directly.
- Comparison of BKI file version and postgres build major version is moved from
  initdb to bootstrap. It only compares the version string to avoid needing
  platform compatability checks with EOL.
- On Windows, in front end code, text mode is enforced. Please refer to
  src/port/open.c. In backend, bki file opens in binary mode. Add text mode
  flag explicitly to handle EOL properly.
---
 src/backend/bootstrap/bootstrap.c   |  84 ++++++++++++++++++++-
 src/backend/catalog/Makefile        |   2 +-
 src/backend/catalog/genbki.pl       |  34 ++++++++-
 src/backend/main/main.c             |   1 +
 src/bin/initdb/initdb.c             | 109 ++++++++++------------------
 src/include/bootstrap/bootstrap.h   |   1 +
 src/include/catalog/meson.build     |   1 +
 src/include/catalog/pg_database.dat |  10 +--
 src/tools/msvc/Solution.pm          |   2 +-
 9 files changed, 162 insertions(+), 82 deletions(-)

diff --git a/src/backend/bootstrap/bootstrap.c b/src/backend/bootstrap/bootstrap.c
index e01dca9b7c..f797cfecfb 100644
--- a/src/backend/bootstrap/bootstrap.c
+++ b/src/backend/bootstrap/bootstrap.c
@@ -29,6 +29,7 @@
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
 #include "common/link-canary.h"
+#include "common/string.h"
 #include "libpq/pqsignal.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
@@ -49,6 +50,8 @@ uint32		bootstrap_data_checksum_version = 0;	/* No checksum */
 
 
 static void CheckerModeMain(void);
+static FILE *open_bki(char *bki_file);
+static bool verify_bki_hdr(FILE *fp);
 static void bootstrap_signals(void);
 static Form_pg_attribute AllocateAttribute(void);
 static void populate_typ_list(void);
@@ -206,6 +209,7 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
 	char	   *progname = argv[0];
 	int			flag;
 	char	   *userDoption = NULL;
+	char	   *bki_file = NULL;
 
 	Assert(!IsUnderPostmaster);
 
@@ -221,10 +225,13 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
 	argv++;
 	argc--;
 
-	while ((flag = getopt(argc, argv, "B:c:d:D:Fkr:X:-:")) != -1)
+	while ((flag = getopt(argc, argv, "b:B:c:d:D:Fkr:X:-:")) != -1)
 	{
 		switch (flag)
 		{
+			case 'b':
+				bki_file = pstrdup(optarg);
+				break;
 			case 'B':
 				SetConfigOption("shared_buffers", optarg, PGC_POSTMASTER, PGC_S_ARGV);
 				break;
@@ -354,9 +361,29 @@ BootstrapModeMain(int argc, char *argv[], bool check_only)
 		Nulls[i] = false;
 	}
 
+	/* Point boot_yyin to bki file. */
+	elog(DEBUG4, "Open bki file %s\n", bki_file);
+	if ((boot_yyin = open_bki(bki_file)) == NULL)
+	{
+		write_stderr("Opening bki_file=%s failed with error=%d.",
+					 bki_file ? bki_file : "", errno);
+		cleanup();
+		proc_exit(1);
+	}
+
 	/*
-	 * Process bootstrap input.
+	 * Verify bki header match with binary version. Bki parser ignore
+	 * commments hence no need to rewind boot_yyin.
 	 */
+	if (!verify_bki_hdr(boot_yyin))
+	{
+		write_stderr("Version in bki file(%s) does not match PostgreSQL version %s",
+					 bki_file, PG_VERSION);
+		cleanup();
+		proc_exit(1);
+	}
+
+	/* Process bootstrap input from bki file (boot_yyin) */
 	StartTransactionCommand();
 	boot_yyparse();
 	CommitTransactionCommand();
@@ -480,6 +507,59 @@ closerel(char *relname)
 	}
 }
 
+/* -----------------------
+ * open_bki
+ *
+ * Open bki file with the flags
+ * required per platform.
+ * -----------------------
+ */
+static FILE *
+open_bki(char *bki_file)
+{
+#if defined(WIN32) && !defined(__CYGWIN__)
+	/*
+	 * On Windows, in front end code, text mode open is enforced. Please refer
+	 * to src/port/open.c for more details. In backend, bki file opens in
+	 * binary mode. Add text mode flag explicitly to handle EOL properly.
+	 */
+	return fopen(bki_file, "rt");
+#else
+	return fopen(bki_file, "r");
+#endif
+}
+
+
+/* -----------------------
+ * verify_bki_hdr
+ *
+ * Verify that the bki file is generated for the
+ * same major version as that of the bootstrap.
+ * -----------------------
+ */
+static bool
+verify_bki_hdr(FILE *b)
+{
+	StringInfoData line;
+	char		headerline[MAXPGPATH];
+	bool		ret = true;
+
+	initStringInfo(&line);
+	if (!pg_get_line_buf(b, &line))
+	{
+		return false;
+	}
+
+	snprintf(headerline, sizeof(headerline), "# PostgreSQL %s\n",
+			 PG_MAJORVERSION);
+	if (strcmp(headerline, line.data) != 0)
+	{
+		ret = false;
+	}
+
+	pfree(line.data);
+	return ret;
+}
 
 
 /* ----------------
diff --git a/src/backend/catalog/Makefile b/src/backend/catalog/Makefile
index 3e9994793d..554ffbbcb2 100644
--- a/src/backend/catalog/Makefile
+++ b/src/backend/catalog/Makefile
@@ -169,7 +169,7 @@ generated-header-symlinks: $(top_builddir)/src/include/catalog/header-stamp
 # instead is cheating a bit, but it will achieve the goal of updating the
 # version number when it changes.
 bki-stamp: genbki.pl Catalog.pm $(POSTGRES_BKI_SRCS) $(POSTGRES_BKI_DATA) $(top_srcdir)/configure.ac $(top_srcdir)/src/include/access/transam.h
-	$(PERL) $< --include-path=$(top_srcdir)/src/include/ \
+	$(PERL) $< --include-path=$(top_srcdir)/src/include/ --include-conf=$(top_builddir)/src/include/ \
 		--set-version=$(MAJORVERSION) $(POSTGRES_BKI_SRCS)
 	touch $@
 
diff --git a/src/backend/catalog/genbki.pl b/src/backend/catalog/genbki.pl
index 380bc23c82..f7c8390e6d 100644
--- a/src/backend/catalog/genbki.pl
+++ b/src/backend/catalog/genbki.pl
@@ -25,13 +25,15 @@ use Catalog;
 my $output_path = '';
 my $major_version;
 my $include_path;
+my $include_conf;
 
 my $num_errors = 0;
 
 GetOptions(
 	'output:s' => \$output_path,
 	'set-version:s' => \$major_version,
-	'include-path:s' => \$include_path) || usage();
+	'include-path:s' => \$include_path,
+	'include-conf:s' => \$include_conf) || usage();
 
 # Sanity check arguments.
 die "No input files.\n" unless @ARGV;
@@ -39,6 +41,7 @@ die "--set-version must be specified.\n" unless $major_version;
 die "Invalid version string: $major_version\n"
   unless $major_version =~ /^\d+$/;
 die "--include-path must be specified.\n" unless $include_path;
+die "--include-conf must be specified.\n" unless $include_conf;
 
 # Make sure paths end with a slash.
 if ($output_path ne '' && substr($output_path, -1) ne '/')
@@ -180,6 +183,12 @@ my $FirstUnpinnedObjectId =
 # Hash of next available OID, indexed by catalog name.
 my %GenbkiNextOids;
 
+my $NameDataLen=Catalog::FindDefinedSymbol('pg_config_manual.h', $include_path,
+	'NAMEDATALEN');
+my $SizeOfPointer=Catalog::FindDefinedSymbol('pg_config.h', $include_conf,
+	'SIZEOF_VOID_P');
+my $Float8PassByVal=$SizeOfPointer >= 8 ? "true": "false";
+my $AlignOfPointer=$SizeOfPointer == 4 ? "i" : "d";
 
 # Fetch some special data that we will substitute into the output file.
 # CAUTION: be wary about what symbols you substitute into the .bki file here!
@@ -634,6 +643,23 @@ EOM
 			my $symbol = form_pg_type_symbol($bki_values{typname});
 			$bki_values{oid_symbol} = $symbol
 			  if defined $symbol;
+
+			if ($bki_values{typlen} eq  'NAMEDATALEN')
+			{
+				$bki_values{typlen} = $NameDataLen;
+			}
+			if ($bki_values{typlen} eq  'SIZEOF_POINTER')
+			{
+				$bki_values{typlen} = $SizeOfPointer;
+			}
+			if ($bki_values{typalign} eq  'ALIGNOF_POINTER')
+			{
+				$bki_values{typalign} = $AlignOfPointer;
+			}
+			if ($bki_values{typbyval} eq  'FLOAT8PASSBYVAL')
+			{
+				$bki_values{typbyval} = $Float8PassByVal;
+			}
 		}
 
 		# Write to postgres.bki
@@ -812,6 +838,11 @@ sub gen_pg_attribute
 			  ($row{attnotnull} eq 't'
 				  && ($row{attlen} eq 'NAMEDATALEN' || $row{attlen} > 0));
 
+			if ($row{attnotnull} eq 't' && ($row{attlen} eq 'NAMEDATALEN'))
+			{
+				$row{attlen} = $NameDataLen;
+			}
+
 			# If it's bootstrapped, put an entry in postgres.bki.
 			print_bki_insert(\%row, $schema) if $table->{bootstrap};
 
@@ -1106,6 +1137,7 @@ Options:
     --output         Output directory (default '.')
     --set-version    PostgreSQL version number for initdb cross-check
     --include-path   Include path in source tree
+    --include-conf   Include file path generated after configuration
 
 genbki.pl generates postgres.bki and symbol definition
 headers from specially formatted header files and .dat
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index ed11e8be7f..41badc8c88 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -372,6 +372,7 @@ help(const char *progname)
 	printf(_("  --check            selects check mode (must be first argument)\n"));
 	printf(_("  DBNAME             database name (mandatory argument in bootstrapping mode)\n"));
 	printf(_("  -r FILENAME        send stdout and stderr to given file\n"));
+	printf(_("  -b FILENAME        path to bki file\n"));
 
 	printf(_("\nPlease read the documentation for the complete list of run-time\n"
 			 "configuration settings and how to set them on the command line or in\n"
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 0c6f5ceb0a..9c8a98b5b7 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -764,15 +764,6 @@ get_id(void)
 	return pg_strdup(username);
 }
 
-static char *
-encodingid_to_string(int enc)
-{
-	char		result[20];
-
-	sprintf(result, "%d", enc);
-	return pg_strdup(result);
-}
-
 /*
  * get the encoding id for a given encoding name
  */
@@ -1473,70 +1464,17 @@ bootstrap_template1(void)
 {
 	PG_CMD_DECL;
 	PQExpBufferData cmd;
-	char	  **line;
-	char	  **bki_lines;
-	char		headerline[MAXPGPATH];
-	char		buf[64];
 
 	printf(_("running bootstrap script ... "));
 	fflush(stdout);
 
-	bki_lines = readfile(bki_file);
-
-	/* Check that bki file appears to be of the right version */
-
-	snprintf(headerline, sizeof(headerline), "# PostgreSQL %s\n",
-			 PG_MAJORVERSION);
-
-	if (strcmp(headerline, *bki_lines) != 0)
-	{
-		pg_log_error("input file \"%s\" does not belong to PostgreSQL %s",
-					 bki_file, PG_VERSION);
-		pg_log_error_hint("Specify the correct path using the option -L.");
-		exit(1);
-	}
-
-	/* Substitute for various symbols used in the BKI file */
-
-	sprintf(buf, "%d", NAMEDATALEN);
-	bki_lines = replace_token(bki_lines, "NAMEDATALEN", buf);
-
-	sprintf(buf, "%d", (int) sizeof(Pointer));
-	bki_lines = replace_token(bki_lines, "SIZEOF_POINTER", buf);
-
-	bki_lines = replace_token(bki_lines, "ALIGNOF_POINTER",
-							  (sizeof(Pointer) == 4) ? "i" : "d");
-
-	bki_lines = replace_token(bki_lines, "FLOAT8PASSBYVAL",
-							  FLOAT8PASSBYVAL ? "true" : "false");
-
-	bki_lines = replace_token(bki_lines, "POSTGRES",
-							  escape_quotes_bki(username));
-
-	bki_lines = replace_token(bki_lines, "ENCODING",
-							  encodingid_to_string(encodingid));
-
-	bki_lines = replace_token(bki_lines, "LC_COLLATE",
-							  escape_quotes_bki(lc_collate));
-
-	bki_lines = replace_token(bki_lines, "LC_CTYPE",
-							  escape_quotes_bki(lc_ctype));
-
-	bki_lines = replace_token(bki_lines, "ICU_LOCALE",
-							  icu_locale ? escape_quotes_bki(icu_locale) : "_null_");
-
-	bki_lines = replace_token(bki_lines, "ICU_RULES",
-							  icu_rules ? escape_quotes_bki(icu_rules) : "_null_");
-
-	sprintf(buf, "%c", locale_provider);
-	bki_lines = replace_token(bki_lines, "LOCALE_PROVIDER", buf);
-
 	/* Also ensure backend isn't confused by this environment var: */
 	unsetenv("PGCLIENTENCODING");
 
 	initPQExpBuffer(&cmd);
 
-	printfPQExpBuffer(&cmd, "\"%s\" --boot %s %s", backend_exec, boot_options, extra_options);
+	printfPQExpBuffer(&cmd, "\"%s\" --boot %s %s -b %s", backend_exec, boot_options,
+					  extra_options, bki_file);
 	appendPQExpBuffer(&cmd, " -X %d", wal_segment_size_mb * (1024 * 1024));
 	if (data_checksums)
 		appendPQExpBuffer(&cmd, " -k");
@@ -1545,21 +1483,46 @@ bootstrap_template1(void)
 
 
 	PG_CMD_OPEN(cmd.data);
-
-	for (line = bki_lines; *line != NULL; line++)
-	{
-		PG_CMD_PUTS(*line);
-		free(*line);
-	}
-
 	PG_CMD_CLOSE();
 
 	termPQExpBuffer(&cmd);
-	free(bki_lines);
 
 	check_ok();
 }
 
+/*
+ * Placeholder values from catalog *.dat are used to create template1.
+ * Here we UPDATE with configured values from current initdb run.
+ */
+static void
+update_params(FILE *cmdfd)
+{
+
+	char	   *icu_locale_str = NULL;
+	char	   *icu_rules_str = NULL;
+	char	   *line = NULL;
+
+	if (icu_locale)
+	{
+		icu_locale_str = psprintf(",daticulocale=%s", escape_quotes_bki(icu_locale));
+	}
+
+	if (icu_rules)
+	{
+		icu_rules_str = psprintf(",daticulocale=%s", escape_quotes_bki(icu_rules));
+	}
+
+	line = psprintf("UPDATE pg_authid SET rolname='%s' WHERE rolname='POSTGRES'; \n\n"
+					"UPDATE pg_database SET encoding='%d', datcollate='%s', datctype='%s',"
+					"datlocprovider='%c' %s %s "
+					"WHERE datname='template1'; \n\n",
+					escape_quotes(username), encodingid, escape_quotes(lc_collate),
+					escape_quotes(lc_ctype), locale_provider,
+					icu_locale_str ? icu_locale_str : "",
+					icu_rules_str ? icu_rules_str : "");
+	PG_CMD_PUTS(line);
+}
+
 /*
  * set up the shadow password table
  */
@@ -3028,6 +2991,8 @@ initialize_data_directory(void)
 
 	PG_CMD_OPEN(cmd.data);
 
+	update_params(cmdfd);
+
 	setup_auth(cmdfd);
 
 	setup_run_file(cmdfd, system_constraints_file);
diff --git a/src/include/bootstrap/bootstrap.h b/src/include/bootstrap/bootstrap.h
index e1cb73c5f2..75ac900a49 100644
--- a/src/include/bootstrap/bootstrap.h
+++ b/src/include/bootstrap/bootstrap.h
@@ -58,5 +58,6 @@ extern int	boot_yyparse(void);
 
 extern int	boot_yylex(void);
 extern void boot_yyerror(const char *message) pg_attribute_noreturn();
+extern FILE *boot_yyin;
 
 #endif							/* BOOTSTRAP_H */
diff --git a/src/include/catalog/meson.build b/src/include/catalog/meson.build
index dcb3c5f766..eb9cdc889c 100644
--- a/src/include/catalog/meson.build
+++ b/src/include/catalog/meson.build
@@ -123,6 +123,7 @@ generated_catalog_headers = custom_target('generated_catalog_headers',
     perl,
     files('../../backend/catalog/genbki.pl'),
     '--include-path=@SOURCE_ROOT@/src/include',
+    '--include-conf=@BUILD_ROOT@/src/include',
     '--set-version=' + pg_version_major.to_string(),
     '--output=@OUTDIR@', '@INPUT@'
   ],
diff --git a/src/include/catalog/pg_database.dat b/src/include/catalog/pg_database.dat
index 8d91e3bf8d..7cf9a56ff6 100644
--- a/src/include/catalog/pg_database.dat
+++ b/src/include/catalog/pg_database.dat
@@ -14,11 +14,11 @@
 
 { oid => '1', oid_symbol => 'Template1DbOid',
   descr => 'default template for new databases',
-  datname => 'template1', encoding => 'ENCODING',
-  datlocprovider => 'LOCALE_PROVIDER', datistemplate => 't',
+  datname => 'template1', encoding => '0',
+  datlocprovider => 'd', datistemplate => 't',
   datallowconn => 't', dathasloginevt => 'f', datconnlimit => '-1', datfrozenxid => '0',
-  datminmxid => '1', dattablespace => 'pg_default', datcollate => 'LC_COLLATE',
-  datctype => 'LC_CTYPE', daticulocale => 'ICU_LOCALE',
-  daticurules => 'ICU_RULES', datacl => '_null_' },
+  datminmxid => '1', dattablespace => 'pg_default', datcollate => 'C',
+  datctype => 'C', daticulocale => '_null_',
+  daticurules => '_null_', datacl => '_null_' },
 
 ]
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index a50f730260..6135155759 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -783,7 +783,7 @@ EOF
 		chdir('src/backend/catalog');
 		my $bki_srcs = join(' ../../../src/include/catalog/', @bki_srcs);
 		system(
-			"perl genbki.pl --include-path ../../../src/include/ --set-version=$majorver $bki_srcs"
+			"perl genbki.pl --include-path ../../../src/include/ --include-conf ../../../src/include/ --set-version=$majorver $bki_srcs"
 		);
 		open(my $f, '>', 'bki-stamp')
 		  || confess "Could not touch bki-stamp";
-- 
2.34.1

