From e435c033cd574b0a07f08577880695de4290747d Mon Sep 17 00:00:00 2001
From: TatsuyaKawata <kawatatatsuya0913@gmail.com>
Date: Sat, 10 Jan 2026 16:51:40 +0900
Subject: [PATCH v3] Add sampling statistics to autoanalyze log output

Previously, autoanalyze log messages only showed buffer usage, WAL usage,
and system usage statistics. However, ANALYZE VERBOSE showed additional
sampling statistics including pages scanned, live rows, and dead rows
found during sampling. This made it difficult to understand the sampling
behavior from autoanalyze logs alone.

This patch unifies the logging by adding sampling statistics to the
autoanalyze log output. The new log format includes:
- Number of pages scanned out of total pages
- Live rows and dead rows found during sampling
- Number of rows in sample and estimated total rows

Additionally, this patch adds "inheritance tree" to the autoanalyze log
message when analyzing inherited statistics, making it easier to
distinguish between single-table analyze and inheritance tree analyze.

To support this change, a new SamplingStats struct is introduced in
vacuum.h to collect and pass sampling statistics. The AcquireSampleRowsFunc
callback signature is updated to include this new parameter.

Author: Tatsuya Kawata <kawatatatsuya0913@gmail.com>
Reviewed-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CAHza6qcN%3DPaGqo8CGgrqd%2BnaOwY_pLGiwEq6u%3D%2BASZZNL9zi9A%40mail.gmail.com#26a70a815cc922b7513e71fc0c445ff3
---
 contrib/file_fdw/file_fdw.c         |  6 ++-
 contrib/postgres_fdw/postgres_fdw.c |  7 +++-
 src/backend/commands/analyze.c      | 65 ++++++++++++++++++++---------
 src/include/commands/vacuum.h       | 13 ++++++
 src/include/foreign/fdwapi.h        |  6 ++-
 5 files changed, 73 insertions(+), 24 deletions(-)

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 33a37d832ce..a131d0e93ce 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -171,7 +171,8 @@ static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 						   Cost *startup_cost, Cost *total_cost);
 static int	file_acquire_sample_rows(Relation onerel, int elevel,
 									 HeapTuple *rows, int targrows,
-									 double *totalrows, double *totaldeadrows);
+									 double *totalrows, double *totaldeadrows,
+									 SamplingStats *sampling_stats);
 
 
 /*
@@ -1185,7 +1186,8 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 static int
 file_acquire_sample_rows(Relation onerel, int elevel,
 						 HeapTuple *rows, int targrows,
-						 double *totalrows, double *totaldeadrows)
+						 double *totalrows, double *totaldeadrows,
+						 SamplingStats *sampling_stats)
 {
 	int			numrows = 0;
 	double		rowstoskip = -1;	/* -1 means not set yet */
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 3572689e33b..08d573b9705 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -21,6 +21,7 @@
 #include "commands/defrem.h"
 #include "commands/explain_format.h"
 #include "commands/explain_state.h"
+#include "commands/vacuum.h"
 #include "executor/execAsync.h"
 #include "foreign/fdwapi.h"
 #include "funcapi.h"
@@ -504,7 +505,8 @@ static void process_query_params(ExprContext *econtext,
 static int	postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 										  HeapTuple *rows, int targrows,
 										  double *totalrows,
-										  double *totaldeadrows);
+										  double *totaldeadrows,
+										  SamplingStats *sampling_stats);
 static void analyze_row_processor(PGresult *res, int row,
 								  PgFdwAnalyzeState *astate);
 static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch);
@@ -5008,7 +5010,8 @@ static int
 postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 							  HeapTuple *rows, int targrows,
 							  double *totalrows,
-							  double *totaldeadrows)
+							  double *totaldeadrows,
+							  SamplingStats *sampling_stats)
 {
 	PgFdwAnalyzeState astate;
 	ForeignTable *table;
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index a483424152c..3475fc2a947 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -86,11 +86,13 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 									   Node *index_expr);
 static int	acquire_sample_rows(Relation onerel, int elevel,
 								HeapTuple *rows, int targrows,
-								double *totalrows, double *totaldeadrows);
+								double *totalrows, double *totaldeadrows,
+								SamplingStats *sampling_stats);
 static int	compare_rows(const void *a, const void *b, void *arg);
 static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
 										  HeapTuple *rows, int targrows,
-										  double *totalrows, double *totaldeadrows);
+										  double *totalrows, double *totaldeadrows,
+										  SamplingStats *sampling_stats);
 static void update_attstats(Oid relid, bool inh,
 							int natts, VacAttrStats **vacattrstats);
 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
@@ -302,6 +304,7 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	double		totalrows,
 				totaldeadrows;
 	HeapTuple  *rows;
+	SamplingStats sampling_stats = {0};
 	PGRUsage	ru0;
 	TimestampTz starttime = 0;
 	MemoryContext caller_context;
@@ -535,11 +538,13 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	if (inh)
 		numrows = acquire_inherited_sample_rows(onerel, elevel,
 												rows, targrows,
-												&totalrows, &totaldeadrows);
+												&totalrows, &totaldeadrows,
+												&sampling_stats);
 	else
 		numrows = (*acquirefunc) (onerel, elevel,
 								  rows, targrows,
-								  &totalrows, &totaldeadrows);
+								  &totalrows, &totaldeadrows,
+								  &sampling_stats);
 
 	/*
 	 * Compute the statistics.  Temporary results during the calculations for
@@ -805,7 +810,12 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 			initStringInfo(&buf);
 
 			if (AmAutoVacuumWorkerProcess())
-				msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n");
+			{
+				if (inh)
+					msgfmt = _("automatic analyze of table \"%s.%s.%s\" inheritance tree\n");
+				else
+					msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n");
+			}
 			else
 				msgfmt = _("finished analyzing table \"%s.%s.%s\"\n");
 
@@ -813,6 +823,18 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 							 get_database_name(MyDatabaseId),
 							 get_namespace_name(RelationGetNamespace(onerel)),
 							 RelationGetRelationName(onerel));
+			if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				appendStringInfo(&buf,
+								 _("sampling: %d rows in sample, %.0f estimated total rows\n"),
+								 numrows, totalrows);
+			else
+				appendStringInfo(&buf,
+								 _("sampling: scanned %u of %u pages, "
+								   "containing %.0f live rows and %.0f dead rows; "
+								   "%d rows in sample, %.0f estimated total rows\n"),
+								 sampling_stats.scannedpages, sampling_stats.totalpages,
+								 sampling_stats.liverows, sampling_stats.deadrows,
+								 numrows, totalrows);
 			if (track_cost_delay_timing)
 			{
 				/*
@@ -1204,7 +1226,8 @@ block_sampling_read_stream_next(ReadStream *stream,
 static int
 acquire_sample_rows(Relation onerel, int elevel,
 					HeapTuple *rows, int targrows,
-					double *totalrows, double *totaldeadrows)
+					double *totalrows, double *totaldeadrows,
+					SamplingStats *sampling_stats)
 {
 	int			numrows = 0;	/* # rows now in reservoir */
 	double		samplerows = 0; /* total # rows collected */
@@ -1345,17 +1368,11 @@ acquire_sample_rows(Relation onerel, int elevel,
 		*totaldeadrows = 0.0;
 	}
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": scanned %d of %u pages, "
-					"containing %.0f live rows and %.0f dead rows; "
-					"%d rows in sample, %.0f estimated total rows",
-					RelationGetRelationName(onerel),
-					bs.m, totalblocks,
-					liverows, deadrows,
-					numrows, *totalrows)));
+	/* Populate sampling statistics output parameters */
+	sampling_stats->totalpages = totalblocks;
+	sampling_stats->scannedpages = bs.m;
+	sampling_stats->liverows = liverows;
+	sampling_stats->deadrows = deadrows;
 
 	return numrows;
 }
@@ -1396,7 +1413,8 @@ compare_rows(const void *a, const void *b, void *arg)
 static int
 acquire_inherited_sample_rows(Relation onerel, int elevel,
 							  HeapTuple *rows, int targrows,
-							  double *totalrows, double *totaldeadrows)
+							  double *totalrows, double *totaldeadrows,
+							  SamplingStats *sampling_stats)
 {
 	List	   *tableOIDs;
 	Relation   *rels;
@@ -1408,10 +1426,12 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				i;
 	ListCell   *lc;
 	bool		has_child;
+	SamplingStats child_sampling_stats;
 
 	/* Initialize output parameters to zero now, in case we exit early */
 	*totalrows = 0;
 	*totaldeadrows = 0;
+	memset(sampling_stats, 0, sizeof(SamplingStats));
 
 	/*
 	 * Find all members of inheritance set.  We only need AccessShareLock on
@@ -1588,7 +1608,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				/* Fetch a random sample of the child's rows */
 				childrows = (*acquirefunc) (childrel, elevel,
 											rows + numrows, childtargrows,
-											&trows, &tdrows);
+											&trows, &tdrows,
+											&child_sampling_stats);
 
 				/* We may need to convert from child's rowtype to parent's */
 				if (childrows > 0 &&
@@ -1619,6 +1640,12 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				numrows += childrows;
 				*totalrows += trows;
 				*totaldeadrows += tdrows;
+
+				/* Accumulate sampling statistics */
+				sampling_stats->totalpages += child_sampling_stats.totalpages;
+				sampling_stats->scannedpages += child_sampling_stats.scannedpages;
+				sampling_stats->liverows += child_sampling_stats.liverows;
+				sampling_stats->deadrows += child_sampling_stats.deadrows;
 			}
 		}
 
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e885a4b9c77..ffb6990199a 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -300,6 +300,19 @@ typedef struct VacDeadItemsInfo
 	int64		num_items;		/* current # of entries */
 } VacDeadItemsInfo;
 
+/*
+ * SamplingStats stores sampling statistics collected during ANALYZE.
+ * This is used to report sampling information for both manual ANALYZE VERBOSE
+ * and autoanalyze logging.
+ */
+typedef struct SamplingStats
+{
+	BlockNumber totalpages;		/* total pages in relation */
+	BlockNumber scannedpages;	/* pages actually scanned */
+	double		liverows;		/* live rows found during sampling */
+	double		deadrows;		/* dead rows found during sampling */
+} SamplingStats;
+
 /* GUC parameters */
 extern PGDLLIMPORT int default_statistics_target;	/* PGDLLIMPORT for PostGIS */
 extern PGDLLIMPORT int vacuum_freeze_min_age;
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index 96b6f692d2a..87e980c2997 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -19,6 +19,9 @@
 /* avoid including explain_state.h here */
 typedef struct ExplainState ExplainState;
 
+/* avoid including vacuum.h here */
+typedef struct SamplingStats SamplingStats;
+
 
 /*
  * Callback function signatures --- see fdwhandler.sgml for more info.
@@ -151,7 +154,8 @@ typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
 typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
 									  HeapTuple *rows, int targrows,
 									  double *totalrows,
-									  double *totaldeadrows);
+									  double *totaldeadrows,
+									  SamplingStats *sampling_stats);
 
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 											  AcquireSampleRowsFunc *func,
-- 
2.34.1

