From e03528f833999cb6b806279ecdba7c42acd71ac0 Mon Sep 17 00:00:00 2001
From: Svetlana Derevyanko <s.derevyanko@postgrespro.ru>
Date: Fri, 29 Dec 2023 08:20:54 +0300
Subject: [PATCH v3] Add Index-level REINDEX with multiple jobs

Author: Maxim Orlov <orlovmg@gmail.com>, Svetlana Derevyanko <s.derevyanko@postgrespro.ru>
---
 doc/src/sgml/ref/reindexdb.sgml    |   3 +-
 src/bin/scripts/reindexdb.c        | 120 +++++++++++++++++++++++------
 src/bin/scripts/t/090_reindexdb.pl |   8 +-
 3 files changed, 104 insertions(+), 27 deletions(-)

diff --git a/doc/src/sgml/ref/reindexdb.sgml b/doc/src/sgml/ref/reindexdb.sgml
index a877439dc5b..98c3333228f 100644
--- a/doc/src/sgml/ref/reindexdb.sgml
+++ b/doc/src/sgml/ref/reindexdb.sgml
@@ -179,8 +179,7 @@ PostgreSQL documentation
         setting is high enough to accommodate all connections.
        </para>
        <para>
-        Note that this option is incompatible with the <option>--index</option>
-        and <option>--system</option> options.
+        Note that this option is incompatible with the <option>--system</option> option.
        </para>
       </listitem>
      </varlistentry>
diff --git a/src/bin/scripts/reindexdb.c b/src/bin/scripts/reindexdb.c
index 231e5c8fd02..91e5c633ef1 100644
--- a/src/bin/scripts/reindexdb.c
+++ b/src/bin/scripts/reindexdb.c
@@ -206,19 +206,8 @@ main(int argc, char *argv[])
 
 	setup_cancel_handler(NULL);
 
-	if (concurrentCons > 1)
-	{
-		/*
-		 * Index-level REINDEX is not supported with multiple jobs as we
-		 * cannot control the concurrent processing of multiple indexes
-		 * depending on the same relation.
-		 */
-		if (indexes.head != NULL)
-			pg_fatal("cannot use multiple jobs to reindex indexes");
-
-		if (syscatalog)
-			pg_fatal("cannot use multiple jobs to reindex system catalogs");
-	}
+	if (concurrentCons > 1 && syscatalog)
+		pg_fatal("cannot use multiple jobs to reindex system catalogs");
 
 	if (alldb)
 	{
@@ -258,7 +247,7 @@ main(int argc, char *argv[])
 		if (indexes.head != NULL)
 			reindex_one_database(&cparams, REINDEX_INDEX, &indexes,
 								 progname, echo, verbose,
-								 concurrently, 1, tablespace);
+								 concurrently, concurrentCons, tablespace);
 
 		if (tables.head != NULL)
 			reindex_one_database(&cparams, REINDEX_TABLE, &tables,
@@ -288,12 +277,16 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 {
 	PGconn	   *conn;
 	SimpleStringListCell *cell;
+	SimpleStringListCell *indices_tables_cell;
 	bool		parallel = concurrentCons > 1;
 	SimpleStringList *process_list = user_list;
+	SimpleStringList *indices_tables_list = NULL;
 	ReindexType process_type = type;
 	ParallelSlotArray *sa;
 	bool		failed = false;
 	int			items_count = 0;
+	char	   *prev_index_table_name = NULL;
+	ParallelSlot *free_slot = NULL;
 
 	conn = connectDatabase(cparams, progname, echo, false, true);
 
@@ -363,8 +356,22 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 					return;
 				break;
 
-			case REINDEX_SYSTEM:
 			case REINDEX_INDEX:
+				Assert(user_list != NULL);
+
+				/* Build a list of relations from the indices */
+				indices_tables_list = get_parallel_object_list(conn, process_type,
+															   user_list, echo);
+
+				if (indices_tables_list)
+					indices_tables_cell = indices_tables_list->head;
+
+				/* Bail out if nothing to process */
+				if (process_list == NULL)
+					return;
+				break;
+
+			case REINDEX_SYSTEM:
 				/* not supported */
 				Assert(false);
 				break;
@@ -404,7 +411,7 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 	do
 	{
 		const char *objname = cell->val;
-		ParallelSlot *free_slot = NULL;
+		bool		need_new_slot = true;
 
 		if (CancelRequested)
 		{
@@ -412,14 +419,27 @@ reindex_one_database(ConnParams *cparams, ReindexType type,
 			goto finish;
 		}
 
-		free_slot = ParallelSlotsGetIdle(sa, NULL);
-		if (!free_slot)
+		if (parallel && process_type == REINDEX_INDEX)
 		{
-			failed = true;
-			goto finish;
+			if (prev_index_table_name != NULL &&
+				strcmp(prev_index_table_name, indices_tables_cell->val) == 0)
+				need_new_slot = false;
+			prev_index_table_name = indices_tables_cell->val;
+			indices_tables_cell = indices_tables_cell->next;
+		}
+
+		if (need_new_slot)
+		{
+			free_slot = ParallelSlotsGetIdle(sa, NULL);
+			if (!free_slot)
+			{
+				failed = true;
+				goto finish;
+			}
+
+			ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
 		}
 
-		ParallelSlotSetHandler(free_slot, TableCommandResultHandler, NULL);
 		run_reindex_command(free_slot->connection, process_type, objname,
 							echo, verbose, concurrently, true, tablespace);
 
@@ -436,6 +456,12 @@ finish:
 		pg_free(process_list);
 	}
 
+	if (indices_tables_list)
+	{
+		simple_string_list_destroy(indices_tables_list);
+		pg_free(indices_tables_list);
+	}
+
 	ParallelSlotsTerminate(sa);
 	pfree(sa);
 
@@ -647,8 +673,48 @@ get_parallel_object_list(PGconn *conn, ReindexType type,
 			}
 			break;
 
-		case REINDEX_SYSTEM:
 		case REINDEX_INDEX:
+			{
+				SimpleStringListCell *cell;
+
+				Assert(user_list != NULL);
+
+				/*
+				 * Straight forward index-level REINDEX is not supported with
+				 * multiple jobs as we cannot control the concurrent
+				 * processing of multiple indexes depending on the same
+				 * relation.  But we can extract appropriate table name for
+				 * the index and put REINDEX INDEX commands into different
+				 * jobs, accordingly to the parent tables.
+				 */
+				appendPQExpBufferStr(&catalog_query,
+									 "SELECT t.relname, n.nspname, i.relname\n"
+									 "FROM pg_catalog.pg_index x\n"
+									 "JOIN pg_catalog.pg_class t ON t.oid = x.indrelid\n"
+									 "JOIN pg_catalog.pg_class i ON i.oid = x.indexrelid\n"
+									 "LEFT JOIN pg_catalog.pg_namespace n ON n.oid = t.relnamespace\n"
+									 "WHERE x.indexrelid OPERATOR(pg_catalog.=) ANY(ARRAY['");
+
+				for (cell = user_list->head; cell; cell = cell->next)
+				{
+					if (cell != user_list->head)
+						appendPQExpBufferStr(&catalog_query, "', '");
+
+					appendQualifiedRelation(&catalog_query, cell->val, conn, echo);
+				}
+
+				appendPQExpBufferStr(&catalog_query,
+									 "']::pg_catalog.regclass[])\n"
+									 "ORDER BY max(i.relpages) OVER \n"
+									 "    (PARTITION BY n.nspname, t.relname),\n"
+									 "  n.nspname, t.relname, i.relpages;\n");
+
+				simple_string_list_destroy(user_list);
+				user_list->head = user_list->tail = NULL;
+			}
+			break;
+
+		case REINDEX_SYSTEM:
 		case REINDEX_TABLE:
 			Assert(false);
 			break;
@@ -680,6 +746,16 @@ get_parallel_object_list(PGconn *conn, ReindexType type,
 
 		simple_string_list_append(tables, buf.data);
 		resetPQExpBuffer(&buf);
+
+		if (type == REINDEX_INDEX)
+		{
+			appendPQExpBufferStr(&buf,
+								 fmtQualifiedId(PQgetvalue(res, i, 1),
+												PQgetvalue(res, i, 2)));
+
+			simple_string_list_append(user_list, buf.data);
+			resetPQExpBuffer(&buf);
+		}
 	}
 	termPQExpBuffer(&buf);
 	PQclear(res);
diff --git a/src/bin/scripts/t/090_reindexdb.pl b/src/bin/scripts/t/090_reindexdb.pl
index 429dd3acd68..f02822da218 100644
--- a/src/bin/scripts/t/090_reindexdb.pl
+++ b/src/bin/scripts/t/090_reindexdb.pl
@@ -236,9 +236,11 @@ $node->safe_psql(
 	CREATE SCHEMA s1;
 	CREATE TABLE s1.t1(id integer);
 	CREATE INDEX ON s1.t1(id);
+	CREATE INDEX i1 ON s1.t1(id);
 	CREATE SCHEMA s2;
 	CREATE TABLE s2.t2(id integer);
 	CREATE INDEX ON s2.t2(id);
+	CREATE INDEX i2 ON s2.t2(id);
 	-- empty schema
 	CREATE SCHEMA s3;
 |);
@@ -246,9 +248,9 @@ $node->safe_psql(
 $node->command_fails(
 	[ 'reindexdb', '-j', '2', '-s', 'postgres' ],
 	'parallel reindexdb cannot process system catalogs');
-$node->command_fails(
-	[ 'reindexdb', '-j', '2', '-i', 'i1', 'postgres' ],
-	'parallel reindexdb cannot process indexes');
+$node->command_ok(
+	[ 'reindexdb', '-j', '2', '-i', 's1.i1', '-i', 's2.i2', 'postgres' ],
+	'parallel reindexdb for indices');
 # Note that the ordering of the commands is not stable, so the second
 # command for s2.t2 is not checked after.
 $node->issues_sql_like(
-- 
2.39.3 (Apple Git-145)

