From 1611911dc9aef2f87db1bcb68442c2dd483ae0ff Mon Sep 17 00:00:00 2001
From: Lukas Fittl <lukas@fittl.com>
Date: Sat, 28 Feb 2026 15:33:56 -0800
Subject: [PATCH v2] pg_buffercache: Add pg_buffercache_relations() function

This function returns an aggregation of buffer contents, grouped on a
per-relfilenode basis. This is often useful to understand which tables or
indexes are currently in cache, and can show cache disruptions due to query
activity when sampled over time.

The existing pg_buffercache() function can be utilized for this by
grouping the result, but passing a large amount of buffer entries
(one per page) back to the tuplestore and then aggregating it can be
prohibitively expensive with large buffer counts. Even on a small shared
buffers (128MB) the new function is 10x faster. Similar to the existing
summary functions this new function does not hold buffer partition or
buffer header locks whilst gathering its statistics.

Author: Lukas Fittl <lukas@fittl.com>
Reviewed by: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed by: Bertrand Drouvot <bertranddrouvot.pg@gmail.com>
Reviewed by: Haibo Yan <tristan.yim@gmail.com>
Reviewed by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed by: Paul A Jungwirth <pj@illuminatedcomputing.com>
Reviewed by: Khoa Nguyen <khoaduynguyen@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CAP53Pkx0=ph0vG_M20yVAoK11yGSTZP=53-rZt36OCP4hBPaDQ@mail.gmail.com
---
 .../expected/pg_buffercache.out               |  14 ++
 .../pg_buffercache--1.6--1.7.sql              |  16 +++
 contrib/pg_buffercache/pg_buffercache_pages.c | 136 +++++++++++++++++-
 contrib/pg_buffercache/sql/pg_buffercache.sql |   4 +
 doc/src/sgml/pgbuffercache.sgml               | 130 +++++++++++++++++
 src/tools/pgindent/typedefs.list              |   2 +
 6 files changed, 301 insertions(+), 1 deletion(-)

diff --git a/contrib/pg_buffercache/expected/pg_buffercache.out b/contrib/pg_buffercache/expected/pg_buffercache.out
index 886dea770f6..452f6ca6f58 100644
--- a/contrib/pg_buffercache/expected/pg_buffercache.out
+++ b/contrib/pg_buffercache/expected/pg_buffercache.out
@@ -33,6 +33,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
  t
 (1 row)
 
+SELECT count(*) > 0 FROM pg_buffercache_relations() WHERE buffers >= 0;
+ ?column? 
+----------
+ t
+(1 row)
+
 -- Check that the functions / views can't be accessed by default. To avoid
 -- having to create a dedicated user, use the pg_database_owner pseudo-role.
 SET ROLE pg_database_owner;
@@ -46,6 +52,8 @@ SELECT * FROM pg_buffercache_summary();
 ERROR:  permission denied for function pg_buffercache_summary
 SELECT * FROM pg_buffercache_usage_counts();
 ERROR:  permission denied for function pg_buffercache_usage_counts
+SELECT * FROM pg_buffercache_relations();
+ERROR:  permission denied for function pg_buffercache_relations
 RESET role;
 -- Check that pg_monitor is allowed to query view / function
 SET ROLE pg_monitor;
@@ -73,6 +81,12 @@ SELECT count(*) > 0 FROM pg_buffercache_usage_counts();
  t
 (1 row)
 
+SELECT count(*) > 0 FROM pg_buffercache_relations();
+ ?column? 
+----------
+ t
+(1 row)
+
 RESET role;
 ------
 ---- Test pg_buffercache_evict* and pg_buffercache_mark_dirty* functions
diff --git a/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql b/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql
index 9a7bf66dab5..99b8e37a81c 100644
--- a/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql
+++ b/contrib/pg_buffercache/pg_buffercache--1.6--1.7.sql
@@ -54,3 +54,19 @@ CREATE FUNCTION pg_buffercache_mark_dirty_all(
     OUT buffers_skipped int4)
 AS 'MODULE_PATHNAME', 'pg_buffercache_mark_dirty_all'
 LANGUAGE C PARALLEL SAFE VOLATILE;
+
+CREATE FUNCTION pg_buffercache_relations(
+    OUT relfilenode oid,
+    OUT reltablespace oid,
+    OUT reldatabase oid,
+    OUT relforknumber int2,
+    OUT buffers int4,
+    OUT buffers_dirty int4,
+    OUT buffers_pinned int4,
+    OUT usagecount_avg float8)
+RETURNS SETOF record
+AS 'MODULE_PATHNAME', 'pg_buffercache_relations'
+LANGUAGE C PARALLEL SAFE;
+
+REVOKE ALL ON FUNCTION pg_buffercache_relations() FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_buffercache_relations() TO pg_monitor;
diff --git a/contrib/pg_buffercache/pg_buffercache_pages.c b/contrib/pg_buffercache/pg_buffercache_pages.c
index db4d711cce7..6f2fc7a69af 100644
--- a/contrib/pg_buffercache/pg_buffercache_pages.c
+++ b/contrib/pg_buffercache/pg_buffercache_pages.c
@@ -15,6 +15,7 @@
 #include "port/pg_numa.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
+#include "common/hashfn.h"
 #include "utils/rel.h"
 #include "utils/tuplestore.h"
 
@@ -23,6 +24,7 @@
 #define NUM_BUFFERCACHE_PAGES_ELEM	9
 #define NUM_BUFFERCACHE_SUMMARY_ELEM 5
 #define NUM_BUFFERCACHE_USAGE_COUNTS_ELEM 4
+#define NUM_BUFFERCACHE_RELATIONS_ELEM 8
 #define NUM_BUFFERCACHE_EVICT_ELEM 2
 #define NUM_BUFFERCACHE_EVICT_RELATION_ELEM 3
 #define NUM_BUFFERCACHE_EVICT_ALL_ELEM 3
@@ -92,6 +94,29 @@ typedef struct
 	BufferCacheOsPagesRec *record;
 } BufferCacheOsPagesContext;
 
+/*
+ * Hash key for pg_buffercache_relations — groups by relation file.
+ */
+typedef struct
+{
+	RelFileLocator locator;
+	ForkNumber	forknum;
+} BufferRelStatsKey;
+
+/*
+ * Hash entry for pg_buffercache_relations — accumulates per-relation
+ * buffer statistics.
+ */
+typedef struct
+{
+	BufferRelStatsKey key;
+	uint32		status;			/* for simplehash */
+	int32		buffers;
+	int32		buffers_dirty;
+	int32		buffers_pinned;
+	int64		usagecount_total;
+} BufferRelStatsEntry;
+
 
 /*
  * Function returning data from the shared buffer cache - buffer number,
@@ -108,7 +133,20 @@ PG_FUNCTION_INFO_V1(pg_buffercache_evict_all);
 PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty);
 PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_relation);
 PG_FUNCTION_INFO_V1(pg_buffercache_mark_dirty_all);
-
+PG_FUNCTION_INFO_V1(pg_buffercache_relations);
+
+#define SH_PREFIX		relstats
+#define SH_ELEMENT_TYPE	BufferRelStatsEntry
+#define SH_KEY_TYPE		BufferRelStatsKey
+#define SH_KEY			key
+#define SH_HASH_KEY(tb, key) \
+	hash_bytes((const unsigned char *) &(key), sizeof(BufferRelStatsKey))
+#define SH_EQUAL(tb, a, b) \
+	(memcmp(&(a), &(b), sizeof(BufferRelStatsKey)) == 0)
+#define SH_SCOPE		static inline
+#define SH_DECLARE
+#define SH_DEFINE
+#include "lib/simplehash.h"
 
 /* Only need to touch memory once per backend process lifetime */
 static bool firstNumaTouch = true;
@@ -961,3 +999,99 @@ pg_buffercache_mark_dirty_all(PG_FUNCTION_ARGS)
 
 	PG_RETURN_DATUM(result);
 }
+
+/*
+ * pg_buffercache_relations
+ *
+ * Produces a set of rows that summarize buffer cache usage per relation-fork
+ * combination. This enables monitoring scripts to only get the summary stats,
+ * instead of accumulating in a query with the full buffer information.
+ */
+Datum
+pg_buffercache_relations(PG_FUNCTION_ARGS)
+{
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	relstats_hash *relstats;
+	relstats_iterator iter;
+	BufferRelStatsEntry *entry;
+	Datum		values[NUM_BUFFERCACHE_RELATIONS_ELEM];
+	bool		nulls[NUM_BUFFERCACHE_RELATIONS_ELEM] = {0};
+
+	InitMaterializedSRF(fcinfo, 0);
+
+	/* Create a hash table to aggregate stats by relation-fork */
+	relstats = relstats_create(CurrentMemoryContext, 128, NULL);
+
+	/* Single pass over all buffers */
+	for (int i = 0; i < NBuffers; i++)
+	{
+		BufferDesc *bufHdr;
+		uint64		buf_state;
+		BufferRelStatsKey key = {0};
+		bool		found;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * Read buffer state without locking, same as pg_buffercache_summary
+		 * and pg_buffercache_usage_counts.  Locking wouldn't provide a
+		 * meaningfully more consistent result since buffers can change state
+		 * immediately after we release the lock.
+		 */
+		bufHdr = GetBufferDescriptor(i);
+		buf_state = pg_atomic_read_u64(&bufHdr->state);
+
+		/* Skip unused/invalid buffers */
+		if (!(buf_state & BM_VALID))
+			continue;
+
+		key.locator = BufTagGetRelFileLocator(&bufHdr->tag);
+		key.forknum = BufTagGetForkNum(&bufHdr->tag);
+
+		entry = relstats_insert(relstats, key, &found);
+
+		if (!found)
+		{
+			entry->buffers = 0;
+			entry->buffers_dirty = 0;
+			entry->buffers_pinned = 0;
+			entry->usagecount_total = 0;
+		}
+
+		entry->buffers++;
+		entry->usagecount_total += BUF_STATE_GET_USAGECOUNT(buf_state);
+
+		if (buf_state & BM_DIRTY)
+			entry->buffers_dirty++;
+
+		if (BUF_STATE_GET_REFCOUNT(buf_state) > 0)
+			entry->buffers_pinned++;
+	}
+
+	/* Emit one row per hash entry */
+	relstats_start_iterate(relstats, &iter);
+	while ((entry = relstats_iterate(relstats, &iter)) != NULL)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		if (entry->buffers == 0)
+			continue;
+
+		values[0] = ObjectIdGetDatum(entry->key.locator.relNumber);
+		values[1] = ObjectIdGetDatum(entry->key.locator.spcOid);
+		values[2] = ObjectIdGetDatum(entry->key.locator.dbOid);
+		values[3] = Int16GetDatum(entry->key.forknum);
+		values[4] = Int32GetDatum(entry->buffers);
+		values[5] = Int32GetDatum(entry->buffers_dirty);
+		values[6] = Int32GetDatum(entry->buffers_pinned);
+		values[7] = Float8GetDatum((double) entry->usagecount_total /
+								   entry->buffers);
+
+		tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+							 values, nulls);
+	}
+
+	relstats_destroy(relstats);
+
+	return (Datum) 0;
+}
diff --git a/contrib/pg_buffercache/sql/pg_buffercache.sql b/contrib/pg_buffercache/sql/pg_buffercache.sql
index 127d604905c..b7f4d14afc2 100644
--- a/contrib/pg_buffercache/sql/pg_buffercache.sql
+++ b/contrib/pg_buffercache/sql/pg_buffercache.sql
@@ -18,6 +18,8 @@ from pg_buffercache_summary();
 
 SELECT count(*) > 0 FROM pg_buffercache_usage_counts() WHERE buffers >= 0;
 
+SELECT count(*) > 0 FROM pg_buffercache_relations() WHERE buffers >= 0;
+
 -- Check that the functions / views can't be accessed by default. To avoid
 -- having to create a dedicated user, use the pg_database_owner pseudo-role.
 SET ROLE pg_database_owner;
@@ -26,6 +28,7 @@ SELECT * FROM pg_buffercache_os_pages;
 SELECT * FROM pg_buffercache_pages() AS p (wrong int);
 SELECT * FROM pg_buffercache_summary();
 SELECT * FROM pg_buffercache_usage_counts();
+SELECT * FROM pg_buffercache_relations();
 RESET role;
 
 -- Check that pg_monitor is allowed to query view / function
@@ -34,6 +37,7 @@ SELECT count(*) > 0 FROM pg_buffercache;
 SELECT count(*) > 0 FROM pg_buffercache_os_pages;
 SELECT buffers_used + buffers_unused > 0 FROM pg_buffercache_summary();
 SELECT count(*) > 0 FROM pg_buffercache_usage_counts();
+SELECT count(*) > 0 FROM pg_buffercache_relations();
 RESET role;
 
 
diff --git a/doc/src/sgml/pgbuffercache.sgml b/doc/src/sgml/pgbuffercache.sgml
index 1e9aee10275..5c50a130c4f 100644
--- a/doc/src/sgml/pgbuffercache.sgml
+++ b/doc/src/sgml/pgbuffercache.sgml
@@ -31,6 +31,10 @@
   <primary>pg_buffercache_usage_counts</primary>
  </indexterm>
 
+ <indexterm>
+  <primary>pg_buffercache_relations</primary>
+ </indexterm>
+
  <indexterm>
   <primary>pg_buffercache_evict</primary>
  </indexterm>
@@ -63,6 +67,7 @@
   <structname>pg_buffercache_numa</structname> views), the
   <function>pg_buffercache_summary()</function> function, the
   <function>pg_buffercache_usage_counts()</function> function, the
+  <function>pg_buffercache_relations()</function> function, the
   <function>pg_buffercache_evict()</function> function, the
   <function>pg_buffercache_evict_relation()</function> function, the
   <function>pg_buffercache_evict_all()</function> function, the
@@ -102,6 +107,12 @@
   count.
  </para>
 
+ <para>
+  The <function>pg_buffercache_relations()</function> function returns a
+  set of rows summarizing buffer cache usage aggregated by relation and fork
+  number.
+ </para>
+
  <para>
   By default, use of the above functions is restricted to superusers and roles
   with privileges of the <literal>pg_monitor</literal> role. Access may be
@@ -564,6 +575,125 @@
   </para>
  </sect2>
 
+ <sect2 id="pgbuffercache-relation-stats">
+  <title>The <function>pg_buffercache_relations()</function> Function</title>
+
+  <para>
+   The definitions of the columns exposed by the function are shown in
+   <xref linkend="pgbuffercache_relation_stats-columns"/>.
+  </para>
+
+  <table id="pgbuffercache_relation_stats-columns">
+   <title><function>pg_buffercache_relations()</function> Output Columns</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>relfilenode</structfield> <type>oid</type>
+       (references <link linkend="catalog-pg-class"><structname>pg_class</structname></link>.<structfield>relfilenode</structfield>)
+      </para>
+      <para>
+       Filenode number of the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>reltablespace</structfield> <type>oid</type>
+       (references <link linkend="catalog-pg-tablespace"><structname>pg_tablespace</structname></link>.<structfield>oid</structfield>)
+      </para>
+      <para>
+       Tablespace OID of the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>reldatabase</structfield> <type>oid</type>
+       (references <link linkend="catalog-pg-database"><structname>pg_database</structname></link>.<structfield>oid</structfield>)
+      </para>
+      <para>
+       Database OID of the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>relforknumber</structfield> <type>smallint</type>
+      </para>
+      <para>
+       Fork number within the relation;  see
+       <filename>common/relpath.h</filename>
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>buffers</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of buffers for the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>buffers_dirty</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of dirty buffers for the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>buffers_pinned</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of pinned buffers for the relation
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>usagecount_avg</structfield> <type>float8</type>
+      </para>
+      <para>
+       Average usage count of the relation's buffers
+      </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+  <para>
+   The <function>pg_buffercache_relations()</function> function returns a
+   set of rows summarizing the state of all shared buffers, aggregated by
+   relation and fork number.  Similar and more detailed information is
+   provided by the <structname>pg_buffercache</structname> view, but
+   <function>pg_buffercache_relations()</function> is significantly
+   cheaper.
+  </para>
+
+  <para>
+   Like the <structname>pg_buffercache</structname> view,
+   <function>pg_buffercache_relations()</function> does not acquire buffer
+   manager locks. Therefore concurrent activity can lead to minor inaccuracies
+   in the result.
+  </para>
+ </sect2>
+
  <sect2 id="pgbuffercache-pg-buffercache-evict">
   <title>The <function>pg_buffercache_evict()</function> Function</title>
   <para>
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 8df23840e57..4ae3ef9103b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -362,6 +362,8 @@ BufferHeapTupleTableSlot
 BufferLockMode
 BufferLookupEnt
 BufferManagerRelation
+BufferRelStatsEntry
+BufferRelStatsKey
 BufferStrategyControl
 BufferTag
 BufferUsage
-- 
2.47.1

