On Tue, 24 Mar 2026 11:43:47 +0000
Dean Rasheed <[email protected]> wrote:

> On Tue, 24 Mar 2026 at 10:57, Yugo Nagata <[email protected]> wrote:
> >
> > One concern is that CREATE STATISTICS does not allow creating extended
> > statistics on a single colmun. If we try this, it raises the following 
> > error:
> >
> >  ERROR:  extended statistics require at least 2 columns
> >
> > Therefore, if we take this approach, we would need to add an exception
> > to this rule for virtual columns.
> 
> I think that would be fine. It makes sense because a virtual column
> expands to an expression, and we already allow CREATE STATISTICS on a
> single expression.

I've attached a patch updated along these lines.

This allows users to define extended statistics on virtual generated
columns. Expressions including such columns are stored in the catalog
as-is, and expanded at ANALYZE time. Extended statistics on a single
virtual generated column are also allowed, since it is treated as a
single expression.

To enable the optimizer to make use of these statistics, expressions
are also expanded at planning time.

Additional documentation and tests are needed, but are not yet included.

Is this approach better then the previous one?

Regards,
Yugo Nagata

-- 
Yugo Nagata <[email protected]>
>From 22e6eace8ce10dd017513ca935d2e86c04b400b6 Mon Sep 17 00:00:00 2001
From: Yugo Nagata <[email protected]>
Date: Fri, 18 Jul 2025 09:58:56 +0900
Subject: [PATCH v6] Allow creating extended statistics on virtual generated
 columns

This allows users to define extended statistics on virtual generated
columns. Expressions including such columns are stored in the catalog
as-is, and expanded at ANALYZE time. Extended statistics on a single
virtual generated column are also allowed, since it is treated as a
single expression.

To enable the optimizer to make use of these statistics, expressions
are also expanded at planning time.
---
 doc/src/sgml/ref/alter_table.sgml       |  9 +----
 src/backend/commands/statscmds.c        | 49 ++++++++++++++-----------
 src/backend/optimizer/util/plancat.c    |  5 +++
 src/backend/statistics/extended_stats.c | 15 ++++++--
 src/test/regress/expected/stats_ext.out | 25 ++++---------
 src/test/regress/sql/stats_ext.sql      | 21 ++++-------
 6 files changed, 59 insertions(+), 65 deletions(-)

diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml
index 8591a6b5014..f8006e9d72a 100644
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -216,8 +216,6 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
       When this form is used, the column's statistics are removed,
       so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
       on the table afterwards is recommended.
-      For a virtual generated column, <command>ANALYZE</command>
-      is not necessary because such columns never have statistics.
      </para>
     </listitem>
    </varlistentry>
@@ -284,12 +282,9 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
      </para>
 
      <para>
-      When this form is used on a stored generated column, its statistics
-      are removed, so running
-      <link linkend="sql-analyze"><command>ANALYZE</command></link>
+      When this form is used, the column's statistics are removed,
+      so running <link linkend="sql-analyze"><command>ANALYZE</command></link>
       on the table afterwards is recommended.
-      For a virtual generated column, <command>ANALYZE</command>
-      is not necessary because such columns never have statistics.
      </para>
     </listitem>
    </varlistentry>
diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c
index c1da79f36ba..8a910ba1cd0 100644
--- a/src/backend/commands/statscmds.c
+++ b/src/backend/commands/statscmds.c
@@ -28,6 +28,7 @@
 #include "commands/comment.h"
 #include "commands/defrem.h"
 #include "miscadmin.h"
+#include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
 #include "optimizer/optimizer.h"
 #include "statistics/statistics.h"
@@ -268,12 +269,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("statistics creation on system columns is not supported")));
 
-			/* Disallow use of virtual generated columns in extended stats */
-			if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on virtual generated columns is not supported")));
-
 			/* Disallow data types without a less-than operator */
 			type = lookup_type_cache(attForm->atttypid, TYPECACHE_LT_OPR);
 			if (type->lt_opr == InvalidOid)
@@ -282,8 +277,22 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
 								attname, format_type_be(attForm->atttypid))));
 
-			attnums[nattnums] = attForm->attnum;
-			nattnums++;
+			/* Treat virtual generated columns as expressions */
+			if (attForm->attgenerated == ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				Node *expr = (Node *) makeVar(1,
+											  attForm->attnum,
+											  attForm->atttypid,
+											  attForm->atttypmod,
+											  attForm->attcollation,
+											  0);
+				stxexprs = lappend(stxexprs, expr);
+			}
+			else
+			{
+				attnums[nattnums] = attForm->attnum;
+				nattnums++;
+			}
 			ReleaseSysCache(atttuple);
 		}
 		else if (IsA(selem->expr, Var)) /* column reference in parens */
@@ -297,12 +306,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("statistics creation on system columns is not supported")));
 
-			/* Disallow use of virtual generated columns in extended stats */
-			if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL)
-				ereport(ERROR,
-						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-						 errmsg("statistics creation on virtual generated columns is not supported")));
-
 			/* Disallow data types without a less-than operator */
 			type = lookup_type_cache(var->vartype, TYPECACHE_LT_OPR);
 			if (type->lt_opr == InvalidOid)
@@ -311,8 +314,16 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 						 errmsg("column \"%s\" cannot be used in statistics because its type %s has no default btree operator class",
 								get_attname(relid, var->varattno, false), format_type_be(var->vartype))));
 
-			attnums[nattnums] = var->varattno;
-			nattnums++;
+			/* Treat virtual generated columns as expressions */
+			if (get_attgenerated(relid, var->varattno) == ATTRIBUTE_GENERATED_VIRTUAL)
+			{
+				stxexprs = lappend(stxexprs, (Node *) var);
+			}
+			else
+			{
+				attnums[nattnums] = var->varattno;
+				nattnums++;
+			}
 		}
 		else					/* expression */
 		{
@@ -336,12 +347,6 @@ CreateStatistics(CreateStatsStmt *stmt, bool check_rights)
 					ereport(ERROR,
 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 							 errmsg("statistics creation on system columns is not supported")));
-
-				/* Disallow use of virtual generated columns in extended stats */
-				if (get_attgenerated(relid, attnum) == ATTRIBUTE_GENERATED_VIRTUAL)
-					ereport(ERROR,
-							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-							 errmsg("statistics creation on virtual generated columns is not supported")));
 			}
 
 			/*
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index b2fbd6a082b..ea8faeff2ac 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -1804,6 +1804,11 @@ get_relation_statistics(PlannerInfo *root, RelOptInfo *rel,
 				if (varno != 1)
 					ChangeVarNodes((Node *) exprs, 1, varno, 0);
 
+				/*
+				 * Expand virtual generated columns in the expressions.
+				 */
+				exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, relation, 1);
+
 				/*
 				 * Run the expressions through eval_const_expressions. This is
 				 * not just an optimization, but is necessary, because the
diff --git a/src/backend/statistics/extended_stats.c b/src/backend/statistics/extended_stats.c
index 334c6498581..9225a168a8a 100644
--- a/src/backend/statistics/extended_stats.c
+++ b/src/backend/statistics/extended_stats.c
@@ -32,6 +32,7 @@
 #include "parser/parsetree.h"
 #include "pgstat.h"
 #include "postmaster/autovacuum.h"
+#include "rewrite/rewriteHandler.h"
 #include "statistics/extended_stats_internal.h"
 #include "statistics/statistics.h"
 #include "utils/acl.h"
@@ -73,7 +74,7 @@ typedef struct StatExtEntry
 } StatExtEntry;
 
 
-static List *fetch_statentries_for_relation(Relation pg_statext, Oid relid);
+static List *fetch_statentries_for_relation(Relation pg_statext, Relation rel);
 static VacAttrStats **lookup_var_attr_stats(Bitmapset *attrs, List *exprs,
 											int nvacatts, VacAttrStats **vacatts);
 static void statext_store(Oid statOid, bool inh,
@@ -125,7 +126,7 @@ BuildRelationExtStatistics(Relation onerel, bool inh, double totalrows,
 
 	/* the list of stats has to be allocated outside the memory context */
 	pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
-	statslist = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+	statslist = fetch_statentries_for_relation(pg_stext, onerel);
 
 	/* memory context for building each statistics object */
 	cxt = AllocSetContextCreate(CurrentMemoryContext,
@@ -279,7 +280,7 @@ ComputeExtStatisticsRows(Relation onerel,
 	oldcxt = MemoryContextSwitchTo(cxt);
 
 	pg_stext = table_open(StatisticExtRelationId, RowExclusiveLock);
-	lstats = fetch_statentries_for_relation(pg_stext, RelationGetRelid(onerel));
+	lstats = fetch_statentries_for_relation(pg_stext, onerel);
 
 	foreach(lc, lstats)
 	{
@@ -416,12 +417,13 @@ statext_is_kind_built(HeapTuple htup, char type)
  * Return a list (of StatExtEntry) of statistics objects for the given relation.
  */
 static List *
-fetch_statentries_for_relation(Relation pg_statext, Oid relid)
+fetch_statentries_for_relation(Relation pg_statext, Relation rel)
 {
 	SysScanDesc scan;
 	ScanKeyData skey;
 	HeapTuple	htup;
 	List	   *result = NIL;
+	Oid			relid = RelationGetRelid(rel);
 
 	/*
 	 * Prepare to scan pg_statistic_ext for entries having stxrelid = this
@@ -491,6 +493,11 @@ fetch_statentries_for_relation(Relation pg_statext, Oid relid)
 
 			pfree(exprsString);
 
+			/*
+			 * Expand virtual generated columns in the expressions.
+			 */
+			exprs = (List *) expand_generated_columns_in_expr((Node *) exprs, rel, 1);
+
 			/*
 			 * Run the expressions through eval_const_expressions. This is not
 			 * just an optimization, but is necessary, because the planner
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index b6431d1ee95..b1c472eb716 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -25,7 +25,7 @@ begin
 end;
 $$;
 -- Verify failures
-CREATE TABLE ext_stats_test (x text, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int, w xid);
 CREATE STATISTICS tst;
 ERROR:  syntax error at or near ";"
 LINE 1: CREATE STATISTICS tst;
@@ -84,30 +84,19 @@ CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
 ERROR:  syntax error at or near ","
 LINE 1: CREATE STATISTICS tst ON (x, y) FROM ext_stats_test;
                                    ^
-DROP TABLE ext_stats_test;
--- statistics on virtual generated column not allowed
-CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid);
-CREATE STATISTICS tst on z from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst on (z) from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst on (z+1) from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
-CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
-ERROR:  statistics creation on virtual generated columns is not supported
 -- statistics on system column not allowed
-CREATE STATISTICS tst on tableoid from ext_stats_test1;
+CREATE STATISTICS tst on tableoid from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
-CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
+CREATE STATISTICS tst on (tableoid) from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
-CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1;
+CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
-CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1;
+CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test;
 ERROR:  statistics creation on system columns is not supported
 -- statistics without a less-than operator not supported
-CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1;
+CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test;
 ERROR:  column "w" cannot be used in statistics because its type xid has no default btree operator class
-DROP TABLE ext_stats_test1;
+DROP TABLE ext_stats_test;
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
 CREATE STATISTICS IF NOT EXISTS ab1_a_b_stats ON a, b FROM ab1;
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index 9dcce3440c8..22ffd74a654 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -28,7 +28,7 @@ end;
 $$;
 
 -- Verify failures
-CREATE TABLE ext_stats_test (x text, y int, z int);
+CREATE TABLE ext_stats_test (x text, y int, z int, w xid);
 CREATE STATISTICS tst;
 CREATE STATISTICS tst ON a, b;
 CREATE STATISTICS tst FROM sometab;
@@ -56,21 +56,14 @@ DROP FUNCTION tftest;
 CREATE STATISTICS tst ON (y) FROM ext_stats_test; -- single column reference
 CREATE STATISTICS tst ON y + z FROM ext_stats_test; -- missing parentheses
 CREATE STATISTICS tst ON (x, y) FROM ext_stats_test; -- tuple expression
-DROP TABLE ext_stats_test;
--- statistics on virtual generated column not allowed
-CREATE TABLE ext_stats_test1 (x int, y int, z int GENERATED ALWAYS AS (x+y) VIRTUAL, w xid);
-CREATE STATISTICS tst on z from ext_stats_test1;
-CREATE STATISTICS tst on (z) from ext_stats_test1;
-CREATE STATISTICS tst on (z+1) from ext_stats_test1;
-CREATE STATISTICS tst (ndistinct) ON z from ext_stats_test1;
 -- statistics on system column not allowed
-CREATE STATISTICS tst on tableoid from ext_stats_test1;
-CREATE STATISTICS tst on (tableoid) from ext_stats_test1;
-CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test1;
-CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test1;
+CREATE STATISTICS tst on tableoid from ext_stats_test;
+CREATE STATISTICS tst on (tableoid) from ext_stats_test;
+CREATE STATISTICS tst on (tableoid::int+1) from ext_stats_test;
+CREATE STATISTICS tst (ndistinct) ON xmin from ext_stats_test;
 -- statistics without a less-than operator not supported
-CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test1;
-DROP TABLE ext_stats_test1;
+CREATE STATISTICS tst (ndistinct) ON w from ext_stats_test;
+DROP TABLE ext_stats_test;
 
 -- Ensure stats are dropped sanely, and test IF NOT EXISTS while at it
 CREATE TABLE ab1 (a INTEGER, b INTEGER, c INTEGER);
-- 
2.43.0

Reply via email to