From fd8c3139151179615ac01a972bf30970b76cd24a Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Mon, 20 Jul 2020 17:11:14 +0200
Subject: [PATCH] Limit jumbling for repeating constants

In case if in pg_stat_statements we found an ArrayExpr or RTE_VALUES
containing only constants, skip jumbling for those with position higher
than a predefined threshold. This helps to avoid problem of duplicated
statements, when the same query is repeated multiple times due to
different number of arguments in the expression, e.g.

    WHERE col IN (1, 2, 3, ...)
---
 .../pg_stat_statements/pg_stat_statements.c   | 110 +++++++++++++++++-
 1 file changed, 108 insertions(+), 2 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 14cad19afb..c96bcade81 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -115,6 +115,8 @@ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
 
 #define JUMBLE_SIZE				1024	/* query serialization buffer size */
 
+#define MERGE_THRESHOLD			5		/* after which number of similar nodes
+										   start to merge then if asked */
 /*
  * Extension version number, for supporting older extension versions' objects
  */
@@ -374,6 +376,7 @@ static void JumbleQuery(pgssJumbleState *jstate, Query *query);
 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
 static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks);
 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
+static bool JumbleExprList(pgssJumbleState *jstate, Node *node);
 static void RecordConstLocation(pgssJumbleState *jstate, int location);
 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
 									   int query_loc, int *query_len_p, int encoding);
@@ -2647,7 +2650,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
 				JumbleExpr(jstate, (Node *) rte->tablefunc);
 				break;
 			case RTE_VALUES:
-				JumbleExpr(jstate, (Node *) rte->values_lists);
+				JumbleExprList(jstate, (Node *) rte->values_lists);
 				break;
 			case RTE_CTE:
 
@@ -2691,6 +2694,109 @@ JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks)
 	}
 }
 
+static bool
+JumbleExprList(pgssJumbleState *jstate, Node *node)
+{
+	ListCell   *temp;
+	Node	   *firstExpr = NULL;
+	bool		merged = false;
+	bool		allConst = true;
+	int 		currentExprIdx;
+
+	if (node == NULL)
+		return merged;
+
+	Assert(IsA(node, List));
+	firstExpr = (Node *) lfirst(list_head((List *) node));
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	/*
+	 * We always emit the node's NodeTag, then any additional fields that are
+	 * considered significant, and then we recurse to any child nodes.
+	 */
+	APP_JUMB(node->type);
+
+	/*
+	 * If the first expression is a constant or a list of constants, try to
+	 * merge the following if they're constants as well. Otherwise do
+	 * JumbleExpr as usual.
+	 */
+	switch (nodeTag(firstExpr))
+	{
+		case T_List:
+			currentExprIdx = 0;
+
+			foreach(temp, (List *) firstExpr)
+			{
+				Node * subExpr = (Node *) lfirst(temp);
+
+				if (!IsA(subExpr, Const))
+				{
+					allConst = false;
+					break;
+				}
+			}
+
+			foreach(temp, (List *) node)
+			{
+				Node 		*expr = (Node *) lfirst(temp);
+				ListCell 	*lc;
+
+				foreach(lc, (List *) expr)
+				{
+					Node * subExpr = (Node *) lfirst(lc);
+
+					if (!IsA(subExpr, Const))
+					{
+						allConst = false;
+						break;
+					}
+				}
+
+				if (!equal(expr, firstExpr) && allConst &&
+					currentExprIdx > MERGE_THRESHOLD)
+				{
+					merged = true;
+					continue;
+				}
+
+				JumbleExpr(jstate, expr);
+				currentExprIdx++;
+			}
+			break;
+
+		case T_Const:
+			currentExprIdx = 0;
+
+			foreach(temp, (List *) node)
+			{
+				Node *expr = (Node *) lfirst(temp);
+
+				if (!equal(expr, firstExpr) && IsA(expr, Const) &&
+					currentExprIdx > MERGE_THRESHOLD)
+				{
+					merged = true;
+					continue;
+				}
+
+				JumbleExpr(jstate, expr);
+				currentExprIdx++;
+			}
+			break;
+
+		default:
+			foreach(temp, (List *) node)
+			{
+				JumbleExpr(jstate, (Node *) lfirst(temp));
+			}
+			break;
+	}
+
+	return merged;
+}
+
 /*
  * Jumble an expression tree
  *
@@ -2928,7 +3034,7 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
 			}
 			break;
 		case T_ArrayExpr:
-			JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+			JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements);
 			break;
 		case T_RowExpr:
 			JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
-- 
2.21.0

