From a08e420a59244c98ba5be8ed5b17746ee5bf5404 Mon Sep 17 00:00:00 2001
From: "Junfeng(Jerome) Yang" <jeyang@pivotal.io>
Date: Wed, 18 Nov 2020 14:16:23 +0800
Subject: [PATCH] Fix vacuum freeze with pg_database toast attribute.

In vac_update_datfrozenxid(), the pg_database tuple for current
database should be fetched from disk heap table instead of system cache.
Since the cache already flatten toast tuple, so if the tuple in
pg_database contains toast attribute, heap_inplace_update() will fail
with "wrong tuple length".

Co-authored-by: Ashwin Agrawal <aashwin@vmware.com>
---
 src/backend/commands/vacuum.c        | 84 ++++++++++++++++++++--------
 src/test/regress/expected/vacuum.out | 58 +++++++++++++++++++
 src/test/regress/sql/vacuum.sql      | 40 +++++++++++++
 3 files changed, 160 insertions(+), 22 deletions(-)

diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index d89ba3031f..be2c7e17ef 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1328,6 +1328,35 @@ vac_update_relstats(Relation relation,
 	table_close(rd, RowExclusiveLock);
 }
 
+/*
+ * fetch_database_tuple - Fetch a copy of database tuple from pg_database.
+ *
+ * This using disk heap table instead of system cache.
+ * relation: opened pg_database relation in vac_update_datfrozenxid().
+ */
+static HeapTuple
+fetch_database_tuple(Relation relation, Oid dbOid)
+{
+	ScanKeyData skey[1];
+	SysScanDesc sscan;
+	HeapTuple	tuple = NULL;
+
+	ScanKeyInit(&skey[0],
+				Anum_pg_database_oid,
+				BTEqualStrategyNumber, F_OIDEQ,
+				ObjectIdGetDatum(dbOid));
+
+	sscan = systable_beginscan(relation, DatabaseOidIndexId, true,
+							   NULL, 1, skey);
+
+	tuple = systable_getnext(sscan);
+	if (HeapTupleIsValid(tuple))
+		tuple = heap_copytuple(tuple);
+
+	systable_endscan(sscan);
+
+	return tuple;
+}
 
 /*
  *	vac_update_datfrozenxid() -- update pg_database.datfrozenxid for our DB
@@ -1350,8 +1379,8 @@ vac_update_relstats(Relation relation,
 void
 vac_update_datfrozenxid(void)
 {
-	HeapTuple	tuple;
-	Form_pg_database dbform;
+	HeapTuple	cached_tuple;
+	Form_pg_database	cached_dbform;
 	Relation	relation;
 	SysScanDesc scan;
 	HeapTuple	classTup;
@@ -1479,42 +1508,53 @@ vac_update_datfrozenxid(void)
 	/* Now fetch the pg_database tuple we need to update. */
 	relation = table_open(DatabaseRelationId, RowExclusiveLock);
 
-	/* Fetch a copy of the tuple to scribble on */
-	tuple = SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
-	if (!HeapTupleIsValid(tuple))
-		elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
-	dbform = (Form_pg_database) GETSTRUCT(tuple);
+	/* Get the pg_database tuple to scribble on */
+	cached_tuple = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
+	cached_dbform = (Form_pg_database) GETSTRUCT(cached_tuple);
 
 	/*
 	 * As in vac_update_relstats(), we ordinarily don't want to let
 	 * datfrozenxid go backward; but if it's "in the future" then it must be
 	 * corrupt and it seems best to overwrite it.
 	 */
-	if (dbform->datfrozenxid != newFrozenXid &&
-		(TransactionIdPrecedes(dbform->datfrozenxid, newFrozenXid) ||
-		 TransactionIdPrecedes(lastSaneFrozenXid, dbform->datfrozenxid)))
-	{
-		dbform->datfrozenxid = newFrozenXid;
+	if (cached_dbform->datfrozenxid != newFrozenXid &&
+		(TransactionIdPrecedes(cached_dbform->datfrozenxid, newFrozenXid) ||
+		 TransactionIdPrecedes(lastSaneFrozenXid, cached_dbform->datfrozenxid)))
 		dirty = true;
-	}
 	else
-		newFrozenXid = dbform->datfrozenxid;
+		newFrozenXid = cached_dbform->datfrozenxid;
 
 	/* Ditto for datminmxid */
-	if (dbform->datminmxid != newMinMulti &&
-		(MultiXactIdPrecedes(dbform->datminmxid, newMinMulti) ||
-		 MultiXactIdPrecedes(lastSaneMinMulti, dbform->datminmxid)))
-	{
-		dbform->datminmxid = newMinMulti;
+	if (cached_dbform->datminmxid != newMinMulti &&
+		(MultiXactIdPrecedes(cached_dbform->datminmxid, newMinMulti) ||
+		 MultiXactIdPrecedes(lastSaneMinMulti, cached_dbform->datminmxid)))
 		dirty = true;
-	}
 	else
-		newMinMulti = dbform->datminmxid;
+		newMinMulti = cached_dbform->datminmxid;
 
 	if (dirty)
+	{
+		HeapTuple			tuple;
+		Form_pg_database	tmp_dbform;
+		/*
+		 * Fetch a copy of the tuple to scribble on from pg_database disk
+		 * heap table instead of system cache
+		 * "SearchSysCacheCopy1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId))".
+		 * Since the cache already flatten toast tuple, so the
+		 * heap_inplace_update will fail with "wrong tuple length".
+		 */
+		tuple = fetch_database_tuple(relation, MyDatabaseId);
+		if (!HeapTupleIsValid(tuple))
+			elog(ERROR, "could not find tuple for database %u", MyDatabaseId);
+		tmp_dbform = (Form_pg_database) GETSTRUCT(tuple);
+		tmp_dbform->datfrozenxid = newFrozenXid;
+		tmp_dbform->datminmxid = newMinMulti;
+
 		heap_inplace_update(relation, tuple);
+		heap_freetuple(tuple);
+	}
 
-	heap_freetuple(tuple);
+	ReleaseSysCache(cached_tuple);
 	table_close(relation, RowExclusiveLock);
 
 	/*
diff --git a/src/test/regress/expected/vacuum.out b/src/test/regress/expected/vacuum.out
index 3fccb183c0..5ff2732a70 100644
--- a/src/test/regress/expected/vacuum.out
+++ b/src/test/regress/expected/vacuum.out
@@ -383,3 +383,61 @@ RESET ROLE;
 DROP TABLE vacowned;
 DROP TABLE vacowned_parted;
 DROP ROLE regress_vacuum;
+-- Vacuum freeze for database with toast attribute in pg_database tuple cause
+-- heap_inplace_update raise error "wrong tuple length". This is because system
+-- cache flatten toast tuple.
+DROP DATABASE IF EXISTS vacuum_freeze_test;
+NOTICE:  database "vacuum_freeze_test" does not exist, skipping
+CREATE DATABASE vacuum_freeze_test;
+create or replace function toast_pg_database_datacl() returns text as $body$
+declare
+	mycounter int;
+begin
+	for mycounter in select i from generate_series(1, 2800) i loop
+		execute 'create role aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' || mycounter;
+		execute 'grant ALL on database vacuum_freeze_test to aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' || mycounter;
+	end loop;
+	return 'ok';
+end;
+$body$ language plpgsql volatile strict;
+create or replace function clean_roles() returns text as $body$
+declare
+	mycounter int;
+begin
+	for mycounter in select i from generate_series(1, 2800) i loop
+		execute 'drop role aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' || mycounter;
+	end loop;
+	return 'ok';
+end;
+$body$ language plpgsql volatile strict;
+select toast_pg_database_datacl();
+ toast_pg_database_datacl 
+--------------------------
+ ok
+(1 row)
+
+\c vacuum_freeze_test
+create temp table before_vacuum as select datname, pg_column_size(datacl) > 8192 as datacl_size, age(datfrozenxid) from pg_database where datname='vacuum_freeze_test';
+select datname, datacl_size from before_vacuum;
+      datname       | datacl_size 
+--------------------+-------------
+ vacuum_freeze_test | t
+(1 row)
+
+vacuum freeze;
+select datname, pg_column_size(datacl) > 8192 as datacl_size, age(datfrozenxid) != (select age from before_vacuum) as age_changed from pg_database where datname='vacuum_freeze_test';
+      datname       | datacl_size | age_changed 
+--------------------+-------------+-------------
+ vacuum_freeze_test | t           | t
+(1 row)
+
+\c regression
+DROP DATABASE vacuum_freeze_test;
+select clean_roles();
+ clean_roles 
+-------------
+ ok
+(1 row)
+
+drop function toast_pg_database_datacl();
+drop function clean_roles();
diff --git a/src/test/regress/sql/vacuum.sql b/src/test/regress/sql/vacuum.sql
index c7b5f96f6b..79fc8e91e0 100644
--- a/src/test/regress/sql/vacuum.sql
+++ b/src/test/regress/sql/vacuum.sql
@@ -294,3 +294,43 @@ RESET ROLE;
 DROP TABLE vacowned;
 DROP TABLE vacowned_parted;
 DROP ROLE regress_vacuum;
+
+-- Vacuum freeze for database with toast attribute in pg_database tuple cause
+-- heap_inplace_update raise error "wrong tuple length". This is because system
+-- cache flatten toast tuple.
+DROP DATABASE IF EXISTS vacuum_freeze_test;
+CREATE DATABASE vacuum_freeze_test;
+create or replace function toast_pg_database_datacl() returns text as $body$
+declare
+	mycounter int;
+begin
+	for mycounter in select i from generate_series(1, 2800) i loop
+		execute 'create role aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' || mycounter;
+		execute 'grant ALL on database vacuum_freeze_test to aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' || mycounter;
+	end loop;
+	return 'ok';
+end;
+$body$ language plpgsql volatile strict;
+
+create or replace function clean_roles() returns text as $body$
+declare
+	mycounter int;
+begin
+	for mycounter in select i from generate_series(1, 2800) i loop
+		execute 'drop role aaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb' || mycounter;
+	end loop;
+	return 'ok';
+end;
+$body$ language plpgsql volatile strict;
+
+select toast_pg_database_datacl();
+\c vacuum_freeze_test
+create temp table before_vacuum as select datname, pg_column_size(datacl) > 8192 as datacl_size, age(datfrozenxid) from pg_database where datname='vacuum_freeze_test';
+select datname, datacl_size from before_vacuum;
+vacuum freeze;
+select datname, pg_column_size(datacl) > 8192 as datacl_size, age(datfrozenxid) != (select age from before_vacuum) as age_changed from pg_database where datname='vacuum_freeze_test';
+\c regression
+DROP DATABASE vacuum_freeze_test;
+select clean_roles();
+drop function toast_pg_database_datacl();
+drop function clean_roles();
-- 
2.17.1

