From 5dec59a1562b11c4ad53b0edcebbf6e45616f28f Mon Sep 17 00:00:00 2001
From: Greg Sabino Mullane <greg@turnstep.com>
Date: Mon, 22 Jul 2024 14:51:17 -0400
Subject: [PATCH] Normalize queries starting with SET

---
 contrib/pg_stat_statements/meson.build      |   1 +
 contrib/pg_stat_statements/t/020_jumbles.pl | 235 ++++++++++++++++++++
 src/backend/nodes/queryjumblefuncs.c        |  16 ++
 src/backend/parser/gram.y                   |  17 ++
 src/include/nodes/parsenodes.h              |   3 +
 5 files changed, 272 insertions(+)
 create mode 100644 contrib/pg_stat_statements/t/020_jumbles.pl

diff --git a/contrib/pg_stat_statements/meson.build b/contrib/pg_stat_statements/meson.build
index 9bfc9657e1..f901fd9ce3 100644
--- a/contrib/pg_stat_statements/meson.build
+++ b/contrib/pg_stat_statements/meson.build
@@ -62,6 +62,7 @@ tests += {
   'tap': {
     'tests': [
       't/010_restart.pl',
+      't/020_jumbles.pl',
     ],
   },
 }
diff --git a/contrib/pg_stat_statements/t/020_jumbles.pl b/contrib/pg_stat_statements/t/020_jumbles.pl
new file mode 100644
index 0000000000..aaa287c4ca
--- /dev/null
+++ b/contrib/pg_stat_statements/t/020_jumbles.pl
@@ -0,0 +1,235 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+# Tests for checking that query jumbling works as expected
+
+use strict;
+use warnings FATAL => 'all';
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+$node->init;
+$node->append_conf('postgresql.conf',
+				   "shared_preload_libraries = 'pg_stat_statements'");
+$node->start;
+
+$node->safe_psql('postgres', 'CREATE EXTENSION pg_stat_statements');
+
+my $check = "SELECT calls, query FROM pg_stat_statements WHERE query ~ 'SET' ORDER BY 2";
+
+my ($test, $result, $expected);
+
+$test = "Setting work_mem creates a new normalized query";
+$expected = '1|SET work_mem = $1';
+$node->safe_psql('postgres', 'SET work_mem = 1111');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Different work_mem value uses the same query";
+$expected = '2|SET work_mem = $1';
+$node->safe_psql('postgres', 'SET work_mem = 2222');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Different work_mem value uses the same query, ignoring caps and whitespace";
+$expected = '3|SET work_mem = $1';
+$node->safe_psql('postgres', 'Set    WORK_MEM = 3333');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Different work_mem using 'TO' uses same query";
+$expected = '4|SET work_mem = $1';
+$node->safe_psql('postgres', 'SET work_mem TO 4444');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting application_name creates a new normalized query";
+$expected = "1|SET application_name = \$1\n4|SET work_mem = \$1";
+$node->safe_psql('postgres', "SET application_name = 'foobar'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Different application_name value uses the same query";
+$expected = "2|SET application_name = \$1\n4|SET work_mem = \$1";
+$node->safe_psql('postgres', "SET application_name = 'foobar2'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Resetting application_name creates a new query";
+$expected = "1|RESET application_name\n2|SET application_name = \$1\n4|SET work_mem = \$1";
+$node->safe_psql('postgres', 'RESET application_name');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Resetting application_name again uses same query";
+$expected = "2|RESET application_name\n2|SET application_name = \$1\n4|SET work_mem = \$1";
+$node->safe_psql('postgres', 'RESET application_name');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$node->safe_psql('postgres', 'SELECT pg_stat_statements_reset()');
+
+$test = "Setting application_name creates a new normalized query";
+$expected = '1|SET application_name = $1';
+$node->safe_psql('postgres', "SET application_name = 'foobar3'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting application_name using 'LOCAL' creates a new nomalized query";
+$expected = "1|SET application_name = \$1\n1|SET LOCAL application_name = \$1";
+$node->safe_psql('postgres', "SET LOCAL application_name = 'wrench'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting application_name using 'LOCAL' uses the same query";
+$expected = "1|SET application_name = \$1\n2|SET LOCAL application_name = \$1";
+$node->safe_psql('postgres', "SET LOCAL application_name = 'candlestick'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting application_name using 'SESSION' uses same query";
+$expected = "2|SET application_name = \$1\n2|SET LOCAL application_name = \$1";
+$node->safe_psql('postgres', "SET SESSION application_name = 'rope'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "transaction" items
+$check =~ s/'.*'/'transaction'/;
+
+$test = "Setting session_characteristics does not create a normalized query";
+$expected = '1|SET session characteristics as transaction deferrable';
+$node->safe_psql('postgres', 'SET session characteristics as transaction deferrable');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting session_characteristics uses same query";
+$expected = '2|SET session characteristics as transaction deferrable';
+$node->safe_psql('postgres', 'SET session characteristics as transaction deferrable');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting session_characteristics using 'NOT' creates new query";
+$expected .= "\n1|SET session characteristics as transaction not deferrable";
+$node->safe_psql('postgres', 'SET session characteristics as transaction not deferrable');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "random_page_cost" items
+$check =~ s/'.*'/'random_page_cost'/;
+
+$test = "Setting random_page_cost to default creates new query";
+$expected = "1|SET random_page_cost TO default";
+$node->safe_psql('postgres', 'SET random_page_cost TO default');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting random_page_cost = default uses same query";
+$expected = "2|SET random_page_cost TO default";
+$node->safe_psql('postgres', 'SET random_page_cost = default');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting random_page_cost 'FROM CURRENT' creates new query";
+$expected = "1|SET random_page_cost FROM CURRENT\n2|SET random_page_cost TO default";
+$node->safe_psql('postgres', 'SET random_page_cost FROM CURRENT');
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "time zone" items
+$check =~ s/'.*'/'zone'/;
+
+$test = "Setting time zone creates new query";
+$expected = "1|SET time zone 'Asia/Tokyo'";
+$node->safe_psql('postgres', "SET time zone 'Asia/Tokyo'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting another time zone creates new query";
+$expected = "1|SET time zone 'Asia/Tokyo'\n1|SET time zone 'GMT'";
+$node->safe_psql('postgres', "SET time zone 'GMT'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "search path" items
+$check =~ s/'.*'/'schema|search'/;
+
+$test = "Setting schema creates new normalized query";
+$expected = "1|SET schema \$1";
+$node->safe_psql('postgres', "SET schema 'foo'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting another schema uses same query";
+$expected = "2|SET schema \$1";
+$node->safe_psql('postgres', "SET schema 'zed'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting search_path uses same query";
+$expected = "3|SET schema \$1";
+$node->safe_psql('postgres', "SET search_path = 'abc'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "client encoding" items
+$check =~ s/'.*'/'NAMES'/;
+
+$test = "Setting NAMES creates new normalized query";
+$expected = "1|SET NAMES \$1";
+$node->safe_psql('postgres', "SET NAMES 'latin1'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+$test = "Setting NAMES again uses same query";
+$expected = "2|SET NAMES \$1";
+$node->safe_psql('postgres', "SET NAMES 'latin2'");
+$result = $node->safe_psql('postgres',$check);
+
+is ($result, $expected, $test);
+$test = "Setting client_encoding uses same query";
+## Ugly outcome, but nobody should be using SET NAMES anyway
+$expected = "3|SET NAMES \$1";
+$node->safe_psql('postgres', "SET client_encoding = 'utf8'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "role" items
+$check =~ s/'.*'/'ROLE'/;
+
+$test = "Setting ROLE creates new query";
+$expected = "1|SET ROLE 'pg_monitor'";
+$node->safe_psql('postgres', "SET ROLE 'pg_monitor'");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "session" items
+$check =~ s/'.*'/'SESSION'/;
+
+$test = "Setting SESSION AUTORIZATION creates new query";
+$expected = "1|SET SESSION AUTHORIZATION 'pg_monitor'";
+$node->safe_psql('postgres', "SET SESSION AUTHORIZATION 'pg_monitor'");
+$result = $node->safe_psql('postgres',$check);
+
+is ($result, $expected, $test);
+$test = "Setting default SESSION AUTHORIZATION creates new query";
+$expected = "1|SET SESSION AUTHORIZATION default\n1|SET SESSION AUTHORIZATION 'pg_monitor'";
+$node->safe_psql('postgres', "SET SESSION AUTHORIZATION default");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+## Switch to only checking "XML" items
+$check =~ s/'.*'/'xml'/;
+
+$test = "Setting XML OPTION creates new query";
+$expected = "1|SET xml option document";
+$node->safe_psql('postgres', "SET xml option document");
+$result = $node->safe_psql('postgres',$check);
+is ($result, $expected, $test);
+
+done_testing();
+
+$node->stop;
+
+exit;
+
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 129fb44709..fa64840cbd 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -56,6 +56,7 @@ static void AppendJumble(JumbleState *jstate,
 static void RecordConstLocation(JumbleState *jstate, int location);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleA_Const(JumbleState *jstate, Node *node);
+static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 
 /*
@@ -352,3 +353,18 @@ _jumbleA_Const(JumbleState *jstate, Node *node)
 		}
 	}
 }
+
+static void
+_jumbleVariableSetStmt(JumbleState *jstate, Node *node)
+{
+	VariableSetStmt *expr = (VariableSetStmt *) node;
+
+	JUMBLE_FIELD(kind);
+	JUMBLE_STRING(name);
+	if (expr->kind != VAR_SET_VALUE ||
+		strcmp(expr->name, "timezone") == 0
+		|| strcmp(expr->name, "xmloption") == 0)
+		JUMBLE_NODE(args);
+	JUMBLE_FIELD(is_local);
+	JUMBLE_LOCATION(location);
+}
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index a043fd4c66..d115fa1e77 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -1649,6 +1649,7 @@ set_rest:
 					n->kind = VAR_SET_MULTI;
 					n->name = "TRANSACTION";
 					n->args = $2;
+					n->location = -1;
 					$$ = n;
 				}
 			| SESSION CHARACTERISTICS AS TRANSACTION transaction_mode_list
@@ -1658,6 +1659,7 @@ set_rest:
 					n->kind = VAR_SET_MULTI;
 					n->name = "SESSION CHARACTERISTICS";
 					n->args = $5;
+					n->location = -1;
 					$$ = n;
 				}
 			| set_rest_more
@@ -1671,6 +1673,7 @@ generic_set:
 					n->kind = VAR_SET_VALUE;
 					n->name = $1;
 					n->args = $3;
+					n->location = @3;
 					$$ = n;
 				}
 			| var_name '=' var_list
@@ -1680,6 +1683,7 @@ generic_set:
 					n->kind = VAR_SET_VALUE;
 					n->name = $1;
 					n->args = $3;
+					n->location = @3;
 					$$ = n;
 				}
 			| var_name TO DEFAULT
@@ -1688,6 +1692,7 @@ generic_set:
 
 					n->kind = VAR_SET_DEFAULT;
 					n->name = $1;
+					n->location = -1;
 					$$ = n;
 				}
 			| var_name '=' DEFAULT
@@ -1696,6 +1701,7 @@ generic_set:
 
 					n->kind = VAR_SET_DEFAULT;
 					n->name = $1;
+					n->location = -1;
 					$$ = n;
 				}
 		;
@@ -1708,6 +1714,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 
 					n->kind = VAR_SET_CURRENT;
 					n->name = $1;
+					n->location = -1;
 					$$ = n;
 				}
 			/* Special syntaxes mandated by SQL standard: */
@@ -1717,6 +1724,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 
 					n->kind = VAR_SET_VALUE;
 					n->name = "timezone";
+					n->location = -1;
 					if ($3 != NULL)
 						n->args = list_make1($3);
 					else
@@ -1738,6 +1746,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 					n->kind = VAR_SET_VALUE;
 					n->name = "search_path";
 					n->args = list_make1(makeStringConst($2, @2));
+					n->location = @2;
 					$$ = n;
 				}
 			| NAMES opt_encoding
@@ -1746,6 +1755,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 
 					n->kind = VAR_SET_VALUE;
 					n->name = "client_encoding";
+					n->location = @2;
 					if ($2 != NULL)
 						n->args = list_make1(makeStringConst($2, @2));
 					else
@@ -1759,6 +1769,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 					n->kind = VAR_SET_VALUE;
 					n->name = "role";
 					n->args = list_make1(makeStringConst($2, @2));
+					n->location = @2;
 					$$ = n;
 				}
 			| SESSION AUTHORIZATION NonReservedWord_or_Sconst
@@ -1768,6 +1779,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 					n->kind = VAR_SET_VALUE;
 					n->name = "session_authorization";
 					n->args = list_make1(makeStringConst($3, @3));
+					n->location = @3;
 					$$ = n;
 				}
 			| SESSION AUTHORIZATION DEFAULT
@@ -1776,6 +1788,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 
 					n->kind = VAR_SET_DEFAULT;
 					n->name = "session_authorization";
+					n->location = -1;
 					$$ = n;
 				}
 			| XML_P OPTION document_or_content
@@ -1785,6 +1798,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 					n->kind = VAR_SET_VALUE;
 					n->name = "xmloption";
 					n->args = list_make1(makeStringConst($3 == XMLOPTION_DOCUMENT ? "DOCUMENT" : "CONTENT", @3));
+					n->location = -1;
 					$$ = n;
 				}
 			/* Special syntaxes invented by PostgreSQL: */
@@ -1795,6 +1809,7 @@ set_rest_more:	/* Generic SET syntaxes: */
 					n->kind = VAR_SET_MULTI;
 					n->name = "TRANSACTION SNAPSHOT";
 					n->args = list_make1(makeStringConst($3, @3));
+					n->location = @3;
 					$$ = n;
 				}
 		;
@@ -1902,6 +1917,7 @@ reset_rest:
 
 					n->kind = VAR_RESET;
 					n->name = "timezone";
+					n->location = -1;
 					$$ = n;
 				}
 			| TRANSACTION ISOLATION LEVEL
@@ -1929,6 +1945,7 @@ generic_reset:
 
 					n->kind = VAR_RESET;
 					n->name = $1;
+					n->location = -1;
 					$$ = n;
 				}
 			| ALL
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h
index 85a62b538e..56e8b21acb 100644
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -2622,11 +2622,14 @@ typedef enum VariableSetKind
 
 typedef struct VariableSetStmt
 {
+	pg_node_attr(custom_query_jumble)
+
 	NodeTag		type;
 	VariableSetKind kind;
 	char	   *name;			/* variable to be set */
 	List	   *args;			/* List of A_Const nodes */
 	bool		is_local;		/* SET LOCAL? */
+	ParseLoc	location pg_node_attr(query_jumble_location);
 } VariableSetStmt;
 
 /* ----------------------
-- 
2.30.2

