From b4a535b34636de168166332b03cd8c7680feabe2 Mon Sep 17 00:00:00 2001
From: Daniel Gustafsson <daniel@yesql.se>
Date: Wed, 22 May 2019 09:59:55 +0200
Subject: [PATCH] WIP: Use heap_multi_insert for catalog relations

Introduce a new function CatalogMultiInsertWithInfo which can replace
multiple calls to CatalogTupleInsertWithInfo by instead taking set of
slots to perform heap_multi_insert on. InsertPgAttributeTuples is also
introduced as a way to insert multiple attributes at once.

For insertions performing recordDependencyOn or InsertPgAttributeTuple
in a loop context, move these to collecting a set of tuples and use the
new InsertPgAttributeTuples or recordMultipleDependencies instead. Also
make recordMultipleDependencies use CatalogMultiInsertWithInfo to ensure
insertion via heap_multi_insert.
---
 src/backend/catalog/heap.c          | 125 ++++++++++++++++++++++++++++++------
 src/backend/catalog/indexing.c      |  37 +++++++++++
 src/backend/catalog/pg_constraint.c |  40 ++++++++----
 src/backend/catalog/pg_depend.c     |  76 +++++++++++++++++++---
 src/backend/catalog/pg_shdepend.c   |  26 ++++++--
 src/include/catalog/indexing.h      |   3 +
 6 files changed, 257 insertions(+), 50 deletions(-)

diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 6cffe550b3..281ec92f61 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -126,6 +126,10 @@ static Node *cookConstraint(ParseState *pstate,
 			   Node *raw_constraint,
 			   char *relname);
 static List *insert_ordered_unique_oid(List *list, Oid datum);
+static void InsertPgAttributeTuples(Relation pg_attribute_rel,
+						FormData_pg_attribute *new_attributes,
+						int natts,
+						CatalogIndexState indstate);
 
 
 /* ----------------------------------------------------------------
@@ -678,6 +682,78 @@ CheckAttributeType(const char *attname,
 				 errhint("Use the COLLATE clause to set the collation explicitly.")));
 }
 
+/*
+ * InsertPgAttributeTuples
+ *		Construct and insert multiple tuples in pg_attribute.
+ *
+ * This is a variant of InsertPgAttributeTuple() which dynamically allocates
+ * space for multiple tuples. Having two so similar functions is a kludge, but
+ * for now it's a TODO to make it less terrible.
+ */
+static void
+InsertPgAttributeTuples(Relation pg_attribute_rel,
+						FormData_pg_attribute *new_attributes,
+						int natts,
+						CatalogIndexState indstate)
+{
+	Datum		values[Natts_pg_attribute];
+	bool		nulls[Natts_pg_attribute];
+	HeapTuple	tup;
+	int			i;
+	TupleTableSlot **slot;
+
+	/*
+	 * The slots are dropped in CatalogMultiInsertWithInfo(). TODO Is natts
+	 * number of slots a reasonable assumption?
+	 */
+	slot = palloc(sizeof(TupleTableSlot *) * natts);
+
+	/* This is a tad tedious, but way cleaner than what we used to do... */
+	memset(values, 0, sizeof(values));
+	memset(nulls, false, sizeof(nulls));
+
+	/* start out with empty permissions and empty options */
+	nulls[Anum_pg_attribute_attacl - 1] = true;
+	nulls[Anum_pg_attribute_attoptions - 1] = true;
+	nulls[Anum_pg_attribute_attfdwoptions - 1] = true;
+	nulls[Anum_pg_attribute_attmissingval - 1] = true;
+
+	/* attcacheoff is always -1 in storage */
+	values[Anum_pg_attribute_attcacheoff - 1] = Int32GetDatum(-1);
+
+	for (i = 0; i < natts; i++)
+	{
+		values[Anum_pg_attribute_attrelid - 1] = ObjectIdGetDatum(new_attributes[i].attrelid);
+		values[Anum_pg_attribute_attname - 1] = NameGetDatum(&new_attributes[i].attname);
+		values[Anum_pg_attribute_atttypid - 1] = ObjectIdGetDatum(new_attributes[i].atttypid);
+		values[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(new_attributes[i].attstattarget);
+		values[Anum_pg_attribute_attlen - 1] = Int16GetDatum(new_attributes[i].attlen);
+		values[Anum_pg_attribute_attnum - 1] = Int16GetDatum(new_attributes[i].attnum);
+		values[Anum_pg_attribute_attndims - 1] = Int32GetDatum(new_attributes[i].attndims);
+		values[Anum_pg_attribute_atttypmod - 1] = Int32GetDatum(new_attributes[i].atttypmod);
+		values[Anum_pg_attribute_attbyval - 1] = BoolGetDatum(new_attributes[i].attbyval);
+		values[Anum_pg_attribute_attstorage - 1] = CharGetDatum(new_attributes[i].attstorage);
+		values[Anum_pg_attribute_attalign - 1] = CharGetDatum(new_attributes[i].attalign);
+		values[Anum_pg_attribute_attnotnull - 1] = BoolGetDatum(new_attributes[i].attnotnull);
+		values[Anum_pg_attribute_atthasdef - 1] = BoolGetDatum(new_attributes[i].atthasdef);
+		values[Anum_pg_attribute_atthasmissing - 1] = BoolGetDatum(new_attributes[i].atthasmissing);
+		values[Anum_pg_attribute_attidentity - 1] = CharGetDatum(new_attributes[i].attidentity);
+		values[Anum_pg_attribute_attgenerated - 1] = CharGetDatum(new_attributes[i].attgenerated);
+		values[Anum_pg_attribute_attisdropped - 1] = BoolGetDatum(new_attributes[i].attisdropped);
+		values[Anum_pg_attribute_attislocal - 1] = BoolGetDatum(new_attributes[i].attislocal);
+		values[Anum_pg_attribute_attinhcount - 1] = Int32GetDatum(new_attributes[i].attinhcount);
+		values[Anum_pg_attribute_attcollation - 1] = ObjectIdGetDatum(new_attributes[i].attcollation);
+
+		slot[i] = MakeSingleTupleTableSlot(RelationGetDescr(pg_attribute_rel),
+										   &TTSOpsHeapTuple);
+		tup = heap_form_tuple(RelationGetDescr(pg_attribute_rel), values, nulls);
+		ExecStoreHeapTuple(heap_copytuple(tup), slot[i], false);
+	}
+
+	/* finally insert the new tuples, update the indexes, and clean up */
+	CatalogMultiInsertWithInfo(pg_attribute_rel, slot, natts, indstate);
+}
+
 /*
  * InsertPgAttributeTuple
  *		Construct and insert a new tuple in pg_attribute.
@@ -772,7 +848,9 @@ AddNewAttributeTuples(Oid new_rel_oid,
 
 	/*
 	 * First we add the user attributes.  This is also a convenient place to
-	 * add dependencies on their datatypes and collations.
+	 * add dependencies on their datatypes and collations. Replacing this
+	 * loop with recordMultipleDependenciesOn() would possibly be a good
+	 * optimization, but would require some refactoring.
 	 */
 	for (i = 0; i < natts; i++)
 	{
@@ -811,17 +889,16 @@ AddNewAttributeTuples(Oid new_rel_oid,
 	 */
 	if (relkind != RELKIND_VIEW && relkind != RELKIND_COMPOSITE_TYPE)
 	{
+		FormData_pg_attribute attStructs[lengthof(SysAtt)];
+
+		/* Fill in the correct relation OID in the copied tuple */
 		for (i = 0; i < (int) lengthof(SysAtt); i++)
 		{
-			FormData_pg_attribute attStruct;
-
-			memcpy(&attStruct, SysAtt[i], sizeof(FormData_pg_attribute));
-
-			/* Fill in the correct relation OID in the copied tuple */
-			attStruct.attrelid = new_rel_oid;
-
-			InsertPgAttributeTuple(rel, &attStruct, indstate);
+			memcpy(&attStructs[i], SysAtt[i], sizeof(FormData_pg_attribute));
+			attStructs[i].attrelid = new_rel_oid;
 		}
+
+		InsertPgAttributeTuples(rel, attStructs, lengthof(SysAtt), indstate);
 	}
 
 	/*
@@ -3459,7 +3536,12 @@ StorePartitionKey(Relation rel,
 	Datum		values[Natts_pg_partitioned_table];
 	bool		nulls[Natts_pg_partitioned_table];
 	ObjectAddress myself;
-	ObjectAddress referenced;
+	/*
+	 * PARTITION_MAX_KEYS is the upper bound for partnatts, so grabbing it on
+	 * the stack can save us a palloc.
+	 */
+	ObjectAddress referenced[PARTITION_MAX_KEYS];
+	int			nref;
 
 	Assert(rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
 
@@ -3508,26 +3590,27 @@ StorePartitionKey(Relation rel,
 	myself.objectSubId = 0;
 
 	/* Operator class and collation per key column */
-	for (i = 0; i < partnatts; i++)
+	for (i = 0, nref = 0; i < partnatts; i++)
 	{
-		referenced.classId = OperatorClassRelationId;
-		referenced.objectId = partopclass[i];
-		referenced.objectSubId = 0;
-
-		recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+		referenced[nref].classId = OperatorClassRelationId;
+		referenced[nref].objectId = partopclass[i];
+		referenced[nref].objectSubId = 0;
+		nref++;
 
 		/* The default collation is pinned, so don't bother recording it */
 		if (OidIsValid(partcollation[i]) &&
 			partcollation[i] != DEFAULT_COLLATION_OID)
 		{
-			referenced.classId = CollationRelationId;
-			referenced.objectId = partcollation[i];
-			referenced.objectSubId = 0;
-
-			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
+			referenced[nref].classId = CollationRelationId;
+			referenced[nref].objectId = partcollation[i];
+			referenced[nref].objectSubId = 0;
+			nref++;
 		}
 	}
 
+	/* Store the dependencies in the catalog */
+	recordMultipleDependencies(&myself, referenced, nref, DEPENDENCY_NORMAL);
+
 	/*
 	 * Anything mentioned in the expressions.  We must ignore the column
 	 * references, which will depend on the table itself; there is no separate
diff --git a/src/backend/catalog/indexing.c b/src/backend/catalog/indexing.c
index f237e62bc9..0819a6449c 100644
--- a/src/backend/catalog/indexing.c
+++ b/src/backend/catalog/indexing.c
@@ -18,6 +18,7 @@
 #include "access/genam.h"
 #include "access/heapam.h"
 #include "access/htup_details.h"
+#include "access/xact.h"
 #include "catalog/index.h"
 #include "catalog/indexing.h"
 #include "executor/executor.h"
@@ -209,6 +210,42 @@ CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup,
 	CatalogIndexInsert(indstate, tup);
 }
 
+/*
+ * CatalogMultiInsertWithInfo
+ *
+ * Insert multiple tuples into the catalog relation at once, with an amortized
+ * cost of CatalogOpenIndexes.
+ */
+void
+CatalogMultiInsertWithInfo(Relation heapRel, TupleTableSlot **slot, int ntuples,
+						   CatalogIndexState indstate)
+{
+	/* Nothing to do */
+	if (ntuples <= 0)
+		return;
+
+	heap_multi_insert(heapRel, slot, ntuples,
+					  GetCurrentCommandId(true), 0, NULL);
+
+	/*
+	 * There is no equivalent to heap_multi_insert for the catalog indexes, so
+	 * we must loop over and insert individually.
+	 */
+	for (int i = 0; i < ntuples; i++)
+	{
+		bool		should_free;
+		HeapTuple	tuple;
+
+		tuple = ExecFetchSlotHeapTuple(slot[i], true, &should_free);
+		tuple->t_tableOid = slot[i]->tts_tableOid;
+		CatalogIndexInsert(indstate, tuple);
+		ExecDropSingleTupleTableSlot(slot[i]);
+
+		if (should_free)
+			heap_freetuple(tuple);
+	}
+}
+
 /*
  * CatalogTupleUpdate - do heap and indexing work for updating a catalog tuple
  *
diff --git a/src/backend/catalog/pg_constraint.c b/src/backend/catalog/pg_constraint.c
index b6145593a3..91f10ff3ef 100644
--- a/src/backend/catalog/pg_constraint.c
+++ b/src/backend/catalog/pg_constraint.c
@@ -230,27 +230,35 @@ CreateConstraintEntry(const char *constraintName,
 
 	table_close(conDesc, RowExclusiveLock);
 
+	ObjectAddress *relobjects;
+	if (constraintNTotalKeys || foreignNKeys)
+		relobjects = palloc(sizeof(ObjectAddress) * Max(constraintNTotalKeys, foreignNKeys));
+
 	if (OidIsValid(relId))
 	{
 		/*
 		 * Register auto dependency from constraint to owning relation, or to
 		 * specific column(s) if any are mentioned.
 		 */
-		ObjectAddress relobject;
-
-		relobject.classId = RelationRelationId;
-		relobject.objectId = relId;
 		if (constraintNTotalKeys > 0)
 		{
+			Assert(relobjects);
+
 			for (i = 0; i < constraintNTotalKeys; i++)
 			{
-				relobject.objectSubId = constraintKey[i];
-
-				recordDependencyOn(&conobject, &relobject, DEPENDENCY_AUTO);
+				relobjects[i].classId = RelationRelationId;
+				relobjects[i].objectId = relId;
+				relobjects[i].objectSubId = constraintKey[i];
 			}
+
+			recordMultipleDependencies(&conobject, relobjects, constraintNTotalKeys, DEPENDENCY_AUTO);
 		}
 		else
 		{
+			ObjectAddress relobject;
+
+			relobject.classId = RelationRelationId;
+			relobject.objectId = relId;
 			relobject.objectSubId = 0;
 
 			recordDependencyOn(&conobject, &relobject, DEPENDENCY_AUTO);
@@ -277,21 +285,25 @@ CreateConstraintEntry(const char *constraintName,
 		 * Register normal dependency from constraint to foreign relation, or
 		 * to specific column(s) if any are mentioned.
 		 */
-		ObjectAddress relobject;
-
-		relobject.classId = RelationRelationId;
-		relobject.objectId = foreignRelId;
 		if (foreignNKeys > 0)
 		{
+			Assert(relobjects);
+
 			for (i = 0; i < foreignNKeys; i++)
 			{
-				relobject.objectSubId = foreignKey[i];
-
-				recordDependencyOn(&conobject, &relobject, DEPENDENCY_NORMAL);
+				relobjects[i].classId = RelationRelationId;
+				relobjects[i].objectId = foreignRelId;
+				relobjects[i].objectSubId = foreignKey[i];
 			}
+
+			recordMultipleDependencies(&conobject, relobjects, foreignNKeys, DEPENDENCY_NORMAL);
 		}
 		else
 		{
+			ObjectAddress relobject;
+
+			relobject.classId = RelationRelationId;
+			relobject.objectId = foreignRelId;
 			relobject.objectSubId = 0;
 
 			recordDependencyOn(&conobject, &relobject, DEPENDENCY_NORMAL);
diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c
index f7caedcc02..14e0d03c73 100644
--- a/src/backend/catalog/pg_depend.c
+++ b/src/backend/catalog/pg_depend.c
@@ -35,7 +35,8 @@ static bool isObjectPinned(const ObjectAddress *object, Relation rel);
 /*
  * Record a dependency between 2 objects via their respective objectAddress.
  * The first argument is the dependent object, the second the one it
- * references.
+ * references. This is a simplified version of recordMultipleDependencies()
+ * aiming to avoid some overhead when we know there is only a single tuple.
  *
  * This simply creates an entry in pg_depend, without any other processing.
  */
@@ -44,7 +45,51 @@ recordDependencyOn(const ObjectAddress *depender,
 				   const ObjectAddress *referenced,
 				   DependencyType behavior)
 {
-	recordMultipleDependencies(depender, referenced, 1, behavior);
+	Relation	dependDesc;
+	HeapTuple	tuple;
+	bool		nulls[Natts_pg_depend];
+	Datum		values[Natts_pg_depend];
+
+	/*
+	 * During bootstrap, do nothing since pg_depend may not exist yet. initdb
+	 * will fill in appropriate pg_depend entries after bootstrap.
+	 */
+	if (IsBootstrapProcessingMode())
+		return;
+
+	dependDesc = table_open(DependRelationId, RowExclusiveLock);
+
+	memset(nulls, false, sizeof(nulls));
+
+	/*
+	 * If the referenced object is pinned by the system, there's no real
+	 * need to record dependencies on it.  This saves lots of space in
+	 * pg_depend, so it's worth the time taken to check.
+	 */
+	if (isObjectPinned(referenced, dependDesc))
+	{
+		table_close(dependDesc, RowExclusiveLock);
+		return;
+	}
+
+	/*
+	 * Record the Dependency.  Note we don't bother to check for
+	 * duplicate dependencies; there's no harm in them.
+	 */
+	values[Anum_pg_depend_classid - 1] = ObjectIdGetDatum(depender->classId);
+	values[Anum_pg_depend_objid - 1] = ObjectIdGetDatum(depender->objectId);
+	values[Anum_pg_depend_objsubid - 1] = Int32GetDatum(depender->objectSubId);
+
+	values[Anum_pg_depend_refclassid - 1] = ObjectIdGetDatum(referenced->classId);
+	values[Anum_pg_depend_refobjid - 1] = ObjectIdGetDatum(referenced->objectId);
+	values[Anum_pg_depend_refobjsubid - 1] = Int32GetDatum(referenced->objectSubId);
+
+	values[Anum_pg_depend_deptype - 1] = CharGetDatum((char) behavior);
+
+	tuple = heap_form_tuple(dependDesc->rd_att, values, nulls);
+	CatalogTupleInsert(dependDesc, tuple);
+
+	table_close(dependDesc, RowExclusiveLock);
 }
 
 /*
@@ -59,7 +104,9 @@ recordMultipleDependencies(const ObjectAddress *depender,
 {
 	Relation	dependDesc;
 	CatalogIndexState indstate;
-	HeapTuple	tup;
+	HeapTuple	tuple;
+	TupleTableSlot **slot;
+	int			ntuples;
 	int			i;
 	bool		nulls[Natts_pg_depend];
 	Datum		values[Natts_pg_depend];
@@ -81,7 +128,10 @@ recordMultipleDependencies(const ObjectAddress *depender,
 
 	memset(nulls, false, sizeof(nulls));
 
-	for (i = 0; i < nreferenced; i++, referenced++)
+	/* TODO is nreferenced a reasonable allocation of slots? */
+	slot = palloc(sizeof(TupleTableSlot *) * nreferenced);
+
+	for (i = 0, ntuples = 0; i < nreferenced; i++, referenced++)
 	{
 		/*
 		 * If the referenced object is pinned by the system, there's no real
@@ -104,20 +154,28 @@ recordMultipleDependencies(const ObjectAddress *depender,
 
 			values[Anum_pg_depend_deptype - 1] = CharGetDatum((char) behavior);
 
-			tup = heap_form_tuple(dependDesc->rd_att, values, nulls);
+			slot[ntuples] = MakeSingleTupleTableSlot(RelationGetDescr(dependDesc),
+													 &TTSOpsHeapTuple);
+
+			tuple = heap_form_tuple(dependDesc->rd_att, values, nulls);
+			ExecStoreHeapTuple(heap_copytuple(tuple), slot[ntuples], false);
+			ntuples++;
 
 			/* fetch index info only when we know we need it */
 			if (indstate == NULL)
 				indstate = CatalogOpenIndexes(dependDesc);
-
-			CatalogTupleInsertWithInfo(dependDesc, tup, indstate);
-
-			heap_freetuple(tup);
 		}
 	}
 
+	/*
+	 * We will have an indstate in case we found any tuples to insert in the
+	 * catalog, so perform a multi insert and close the index again when done.
+	 */
 	if (indstate != NULL)
+	{
+		CatalogMultiInsertWithInfo(dependDesc, slot, ntuples, indstate);
 		CatalogCloseIndexes(indstate);
+	}
 
 	table_close(dependDesc, RowExclusiveLock);
 }
diff --git a/src/backend/catalog/pg_shdepend.c b/src/backend/catalog/pg_shdepend.c
index 4a9b4efb05..bf49efe099 100644
--- a/src/backend/catalog/pg_shdepend.c
+++ b/src/backend/catalog/pg_shdepend.c
@@ -66,7 +66,6 @@
 #include "utils/fmgroids.h"
 #include "utils/syscache.h"
 
-
 typedef enum
 {
 	LOCAL_OBJECT,
@@ -800,10 +799,15 @@ copyTemplateDependencies(Oid templateDbId, Oid newDbId)
 	ScanKeyData key[1];
 	SysScanDesc scan;
 	HeapTuple	tup;
+	HeapTuple	newtuple;
+	int			ntuples;
 	CatalogIndexState indstate;
 	Datum		values[Natts_pg_shdepend];
 	bool		nulls[Natts_pg_shdepend];
 	bool		replace[Natts_pg_shdepend];
+	/* TODO figure out a sensible value for the amount of slots */
+#define	DEPEND_TUPLE_BUF 32
+	TupleTableSlot *slot[DEPEND_TUPLE_BUF];
 
 	sdepRel = table_open(SharedDependRelationId, RowExclusiveLock);
 	sdepDesc = RelationGetDescr(sdepRel);
@@ -834,16 +838,26 @@ copyTemplateDependencies(Oid templateDbId, Oid newDbId)
 	 * copy the ownership dependency of the template database itself; this is
 	 * what we want.
 	 */
+	ntuples = 0;
 	while (HeapTupleIsValid(tup = systable_getnext(scan)))
 	{
-		HeapTuple	newtup;
-
-		newtup = heap_modify_tuple(tup, sdepDesc, values, nulls, replace);
-		CatalogTupleInsertWithInfo(sdepRel, newtup, indstate);
+		slot[ntuples] = MakeSingleTupleTableSlot(RelationGetDescr(sdepRel),
+									&TTSOpsHeapTuple);
+		newtuple = heap_modify_tuple(tup, sdepDesc, values, nulls, replace);
+		ExecStoreHeapTuple(heap_copytuple(newtuple), slot[ntuples], false);
+		ntuples++;
 
-		heap_freetuple(newtup);
+		if (ntuples == DEPEND_TUPLE_BUF)
+		{
+			CatalogMultiInsertWithInfo(sdepRel, slot, ntuples, indstate);
+			ntuples = 0;
+		}
 	}
 
+	/* Insert any tuples left in the buffer */
+	if (ntuples)
+		CatalogMultiInsertWithInfo(sdepRel, slot, ntuples, indstate);
+
 	systable_endscan(scan);
 
 	CatalogCloseIndexes(indstate);
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h
index f253613650..8df5dec434 100644
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -19,6 +19,7 @@
 #define INDEXING_H
 
 #include "access/htup.h"
+#include "nodes/execnodes.h"
 #include "utils/relcache.h"
 
 /*
@@ -36,6 +37,8 @@ extern void CatalogCloseIndexes(CatalogIndexState indstate);
 extern void CatalogTupleInsert(Relation heapRel, HeapTuple tup);
 extern void CatalogTupleInsertWithInfo(Relation heapRel, HeapTuple tup,
 						   CatalogIndexState indstate);
+extern void CatalogMultiInsertWithInfo(Relation heapRel, TupleTableSlot **slot,
+						   int ntuples, CatalogIndexState indstate);
 extern void CatalogTupleUpdate(Relation heapRel, ItemPointer otid,
 				   HeapTuple tup);
 extern void CatalogTupleUpdateWithInfo(Relation heapRel,
-- 
2.14.1.145.gb3622a4ee

