commit da84659aacf0b72769c01783ae8b5ee595da3f77
Author: Joel Jakobsson <joel@compiler.org>
Date:   Thu Jun 15 22:54:05 2023 +0200

    Ensure hashset_add and hashset_merge operate on copied data
    
    Previously, the hashset_add() and hashset_merge() functions were
    modifying the original hashset in-place. This was leading to unexpected
    results because the original data in the hashset was being altered.
    
    This commit introduces the macro PG_GETARG_INT4HASHSET_COPY(), ensuring
    a copy of the hashset is created and modified, leaving the original
    hashset untouched.
    
    This adjustment ensures hashset_add() and hashset_merge() operate
    correctly on the copied hashset and prevent modification of the
    original data.
    
    A new regression test file `reported_bugs.sql` has been added to
    validate the proper functionality of these changes. Future reported
    bugs and their corresponding tests will also be added to this file.

diff --git a/Makefile b/Makefile
index b09a50f..59669ef 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ SERVER_INCLUDES=-I$(shell pg_config --includedir-server)
 CLIENT_INCLUDES=-I$(shell pg_config --includedir)
 LIBRARY_PATH = -L$(shell pg_config --libdir)
 
-REGRESS = prelude basic io_varying_lengths random table invalid order parsing
+REGRESS = prelude basic io_varying_lengths random table invalid order parsing reported_bugs
 REGRESS_OPTS = --inputdir=test
 
 PG_CONFIG = pg_config
diff --git a/hashset.c b/hashset.c
index b9025b4..569ae91 100644
--- a/hashset.c
+++ b/hashset.c
@@ -42,6 +42,7 @@ static bool int4hashset_contains_element(int4hashset_t *set, int32 value);
 static Datum int32_to_array(FunctionCallInfo fcinfo, int32 * d, int len);
 
 #define PG_GETARG_INT4HASHSET(x)	(int4hashset_t *) PG_DETOAST_DATUM(PG_GETARG_DATUM(x))
+#define PG_GETARG_INT4HASHSET_COPY(x) (int4hashset_t *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(x))
 #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
 #define HASHSET_STEP 13
 #define JENKINS_LOOKUP3_HASHFN_ID 1
@@ -566,7 +567,7 @@ int4hashset_add(PG_FUNCTION_ARGS)
 	else
 	{
 		/* make sure we are working with a non-toasted and non-shared copy of the input */
-		set = (int4hashset_t *) PG_GETARG_INT4HASHSET(0);
+		set = PG_GETARG_INT4HASHSET_COPY(0);
 	}
 
 	set = int4hashset_add_element(set, PG_GETARG_INT32(1));
@@ -592,7 +593,7 @@ int4hashset_merge(PG_FUNCTION_ARGS)
 	else if (PG_ARGISNULL(0))
 		PG_RETURN_POINTER(PG_GETARG_INT4HASHSET(1));
 
-	seta = PG_GETARG_INT4HASHSET(0);
+	seta = PG_GETARG_INT4HASHSET_COPY(0);
 	setb = PG_GETARG_INT4HASHSET(1);
 
 	bitmap = setb->data;
diff --git a/test/expected/reported_bugs.out b/test/expected/reported_bugs.out
new file mode 100644
index 0000000..226e81c
--- /dev/null
+++ b/test/expected/reported_bugs.out
@@ -0,0 +1,27 @@
+/*
+ * In the original implementation of the query, the hashset_add() and
+ * hashset_merge() functions were modifying the original hashset in-place.
+ * This issue was leading to unexpected results because the functions
+ * were altering the original data in the hashset.
+ *
+ * The problem was fixed by introducing a macro function
+ * PG_GETARG_INT4HASHSET_COPY() in the C code. This function ensures that
+ * a copy of the hashset is created and modified, leaving the original
+ * hashset untouched. This fix resulted in the correct execution of the
+ * query, with hashset_add() and hashset_merge() working on the copied
+ * hashset, thereby preventing alteration of the original data.
+ */
+SELECT
+    q.hashset,
+    hashset_add(hashset,4)
+FROM
+(
+    SELECT
+        hashset(generate_series)
+    FROM generate_series(1,3)
+) q;
+ hashset | hashset_add 
+---------+-------------
+ {1,3,2} | {1,3,4,2}
+(1 row)
+
diff --git a/test/sql/reported_bugs.sql b/test/sql/reported_bugs.sql
new file mode 100644
index 0000000..fcd0b9d
--- /dev/null
+++ b/test/sql/reported_bugs.sql
@@ -0,0 +1,22 @@
+/*
+ * In the original implementation of the query, the hashset_add() and
+ * hashset_merge() functions were modifying the original hashset in-place.
+ * This issue was leading to unexpected results because the functions
+ * were altering the original data in the hashset.
+ *
+ * The problem was fixed by introducing a macro function
+ * PG_GETARG_INT4HASHSET_COPY() in the C code. This function ensures that
+ * a copy of the hashset is created and modified, leaving the original
+ * hashset untouched. This fix resulted in the correct execution of the
+ * query, with hashset_add() and hashset_merge() working on the copied
+ * hashset, thereby preventing alteration of the original data.
+ */
+SELECT
+    q.hashset,
+    hashset_add(hashset,4)
+FROM
+(
+    SELECT
+        hashset(generate_series)
+    FROM generate_series(1,3)
+) q;
