From 47eddc78c263590e7a7ce7f002ec11bddba43a54 Mon Sep 17 00:00:00 2001
From: hari krishna <hmaddileti@vmware.com>
Date: Tue, 10 Jan 2023 15:35:38 +0530
Subject: [PATCH v13] Implement input functions for extended statistics types

In order restore a dumped extended statistics (stxdndistinct, stxddependencies, stxdmcv) we need to provide input functions to parse pg_distinct/pg_dependency/pg_mcv_list strings.

Today we get the ERROR "cannot accept a value of type pg_ndistinct/pg_dependencies/pg_mcv_list" when we try to do an insert of any type.

Approach :
- Using yacc grammar file (statistics_gram.y) to parse the input string to its internal format for the types pg_distinct and pg_dependencies
- We are just calling byteain() for serialized input text of type pg_mcv_list.
- Add regress testcases for intput  functions pg_dependencies_in and pg_ndistinct_in

Usecase:
	- Helps for reproducing complex customer issues locally
	- dump and restore using pg_upgrade and pg_restore

Co-authored-by: David Kimura dkimura@vmware.com
---
 src/backend/statistics/Makefile               |   5 +-
 src/backend/statistics/dependencies.c         |  28 +-
 src/backend/statistics/mcv.c                  |  14 +-
 src/backend/statistics/mvdistinct.c           |  25 +-
 src/backend/statistics/statistics_gram.y      | 219 ++++++++++++++
 src/backend/statistics/statistics_scanner.l   | 118 ++++++++
 .../statistics/extended_stats_internal.h      |  13 +
 src/include/statistics/statistics.h           |   4 +
 src/test/regress/expected/stats_ext.out       | 277 ++++++++++++++++++
 src/test/regress/sql/stats_ext.sql            |  98 +++++++
 10 files changed, 771 insertions(+), 30 deletions(-)
 create mode 100644 src/backend/statistics/statistics_gram.y
 create mode 100644 src/backend/statistics/statistics_scanner.l

diff --git a/src/backend/statistics/Makefile b/src/backend/statistics/Makefile
index 89cf8c2797..cd4bb37734 100644
--- a/src/backend/statistics/Makefile
+++ b/src/backend/statistics/Makefile
@@ -16,6 +16,9 @@ OBJS = \
 	dependencies.o \
 	extended_stats.o \
 	mcv.o \
-	mvdistinct.o
+	mvdistinct.o \
+	statistics_gram.o
+
+statistics_gram.o: statistics_scanner.c
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/statistics/dependencies.c b/src/backend/statistics/dependencies.c
index e6e2835345..2bcc86bfd2 100644
--- a/src/backend/statistics/dependencies.c
+++ b/src/backend/statistics/dependencies.c
@@ -88,6 +88,7 @@ static Selectivity clauselist_apply_dependencies(PlannerInfo *root, List *clause
 												 int ndependencies,
 												 AttrNumber *list_attnums,
 												 Bitmapset **estimatedclauses);
+extern void statistics_scanner_init(const char *query_string);
 
 static void
 generate_dependencies_recurse(DependencyGenerator state, int index,
@@ -648,21 +649,26 @@ statext_dependencies_load(Oid mvoid, bool inh)
 /*
  * pg_dependencies_in		- input routine for type pg_dependencies.
  *
- * pg_dependencies is real enough to be a table column, but it has no operations
- * of its own, and disallows input too
+ * converts the dependencies from the external format in "string" to its
+ * internal format.
  */
 Datum
 pg_dependencies_in(PG_FUNCTION_ARGS)
 {
-	/*
-	 * pg_node_list stores the data in binary form and parsing text input is
-	 * not needed, so disallow this.
-	 */
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_dependencies")));
-
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	char	   *str = PG_GETARG_CSTRING(0);
+	MVDependencies *mvdependencies;
+	int			parse_rc;
+
+	statistics_scanner_init(str);
+	parse_rc = statistic_yyparse();
+	if (parse_rc != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("failed to parse a value of type %s", "pg_dependencies")));
+	statistic_scanner_finish();
+	mvdependencies = mvdependencies_parse_result;
+
+	PG_RETURN_MVNDistinct_P(statext_dependencies_serialize(mvdependencies));
 }
 
 /*
diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c
index 2d2a87d3a6..452a760683 100644
--- a/src/backend/statistics/mcv.c
+++ b/src/backend/statistics/mcv.c
@@ -1469,21 +1469,13 @@ pg_stats_ext_mcvlist_items(PG_FUNCTION_ARGS)
 /*
  * pg_mcv_list_in		- input routine for type pg_mcv_list.
  *
- * pg_mcv_list is real enough to be a table column, but it has no operations
- * of its own, and disallows input too
+ * converts serialized text MCV lists into a byte values by simply
+ * calling byeain().
  */
 Datum
 pg_mcv_list_in(PG_FUNCTION_ARGS)
 {
-	/*
-	 * pg_mcv_list stores the data in binary form and parsing text input is
-	 * not needed, so disallow this.
-	 */
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_mcv_list")));
-
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	PG_RETURN_MCVList_P(byteain(fcinfo));
 }
 
 
diff --git a/src/backend/statistics/mvdistinct.c b/src/backend/statistics/mvdistinct.c
index 13301a3157..0548fda85a 100644
--- a/src/backend/statistics/mvdistinct.c
+++ b/src/backend/statistics/mvdistinct.c
@@ -42,6 +42,8 @@ static double estimate_ndistinct(double totalrows, int numrows, int d, int f1);
 static int	n_choose_k(int n, int k);
 static int	num_combinations(int n);
 
+extern void statistics_scanner_init(const char *query_string);
+
 /* size of the struct header fields (magic, type, nitems) */
 #define SizeOfHeader		(3 * sizeof(uint32))
 
@@ -333,17 +335,26 @@ statext_ndistinct_deserialize(bytea *data)
  * pg_ndistinct_in
  *		input routine for type pg_ndistinct
  *
- * pg_ndistinct is real enough to be a table column, but it has no
- * operations of its own, and disallows input (just like pg_node_tree).
+ * converts the distinct from the external format in "string" to its internal
+ * format.
  */
 Datum
 pg_ndistinct_in(PG_FUNCTION_ARGS)
 {
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("cannot accept a value of type %s", "pg_ndistinct")));
-
-	PG_RETURN_VOID();			/* keep compiler quiet */
+	char	   *str = PG_GETARG_CSTRING(0);
+	MVNDistinct *mvndistinct;
+	int			parse_rc;
+
+	statistics_scanner_init(str);
+	parse_rc = statistic_yyparse();
+	if (parse_rc != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("failed to parse a value of type %s", "pg_ndistinct")));
+	statistic_scanner_finish();
+	mvndistinct = mvndistinct_parse_result;
+
+	PG_RETURN_MVNDistinct_P(statext_ndistinct_serialize(mvndistinct));
 }
 
 /*
diff --git a/src/backend/statistics/statistics_gram.y b/src/backend/statistics/statistics_gram.y
new file mode 100644
index 0000000000..022fba6ca9
--- /dev/null
+++ b/src/backend/statistics/statistics_gram.y
@@ -0,0 +1,219 @@
+%
+{
+#include "postgres.h"
+
+#include "statistics/extended_stats_internal.h"
+#include "statistics/statistics.h"
+
+	MVNDistinct *mvndistinct_parse_result;
+	MVDependencies *mvdependencies_parse_result;
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc.  This prevents
+ * memory leaks if we error out during parsing.  Note this only works with
+ * bison >= 2.0.  However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE   pfree
+
+	int			attrCount = 0;
+
+	%
+}
+
+%expect 0
+% name - prefix = "statistic_yy"
+
+
+% union
+{
+	uint32		uintval;
+	double		doubleval;
+
+	MVNDistinct *ndistinct;
+	MVNDistinctItem *ndistinct_item;
+
+	MVDependencies *dependencies;
+	MVDependency *dependency;
+
+	Bitmapset  *bitmap;
+	List	   *list;
+}
+
+/* Non-keyword tokens */
+		   %token < uintval > UCONST
+%			token < doubleval > DOUBLE
+%			token ARROW
+
+%			type < ndistinct > ndistinct
+%			type < ndistinct_item > ndistinct_item
+%			type < list > ndistinct_item_list
+
+%			type < dependencies > dependencies
+%			type < list > dependency_item_list
+%			type < dependency > dependency_item
+%			type < bitmap > dependency_attrs
+
+%			type < bitmap > attrs
+
+%%
+
+			extended_statistic:
+			ndistinct
+{
+}		   |
+
+			dependencies
+{
+}
+
+		   ;
+
+/*
+ * "ndistinct" rule helps to parse the input string recursively and stores the output into MVNDistinct structure.
+ * Exmple:
+ * 	intput : '{"1, 2": 1,"2, 3": 2, "3, 1", 2}'
+ * 	output : returns MVNDistinct object
+*/
+ndistinct:
+'{' ndistinct_item_list '}'
+{
+	$$ = palloc0(MAXALIGN(offsetof(MVNDistinct, items)) +
+				 list_length($2) * sizeof(MVNDistinctItem));
+	mvndistinct_parse_result = $$;
+	$$->magic = STATS_NDISTINCT_MAGIC;
+	$$->type = STATS_NDISTINCT_TYPE_BASIC;
+	$$->nitems = list_length($2);
+
+	ListCell   *cell;
+	MVNDistinctItem *pointer = $$->items;
+
+	foreach(cell, $2)
+	{
+		memcpy(pointer, lfirst(cell), sizeof(MVNDistinctItem));
+		pointer += 1;
+	}
+}
+
+;
+
+ndistinct_item_list:
+ndistinct_item_list ',' ndistinct_item
+{
+	$$ = lappend($1, $3);
+}
+
+|ndistinct_item
+{
+	$$ = lappend(NIL, $1);
+}
+
+;
+
+ndistinct_item:
+'"' attrs '"' ':' UCONST
+{
+	$$ = (MVNDistinctItem *) palloc0(sizeof(MVNDistinctItem));
+	$$->attrs = $2;
+	$$->ndistinct = $5;
+}
+
+;
+
+attrs:
+attrs ',' UCONST
+{
+	$$ = bms_add_member($1, $3);
+	attrCount += 1;
+
+}
+
+|UCONST ',' UCONST
+{
+	$$ = bms_make_singleton($1);
+	$$ = bms_add_member($$, $3);
+
+	attrCount += 2;
+}
+
+;
+
+/*
+ * "dependencies" rule helps to parse the input string recursively and stores the output into MVDependencies structure.
+ * example:
+ *	intput : '{"1 => 2": 1.000000, "2 => 3": 2.000000}'
+ * 	output : returns MVDependencies	object
+*/
+dependencies:
+'{' dependency_item_list '}'
+{
+	$$ = palloc0(MAXALIGN(offsetof(MVDependencies, deps)) + list_length($2) * sizeof(MVDependency *));
+	mvdependencies_parse_result = $$;
+
+	$$->magic = STATS_DEPS_MAGIC;
+	$$->type = STATS_DEPS_TYPE_BASIC;
+	$$->ndeps = list_length($2);
+
+	for (int i = 0; i < $$->ndeps; i++)
+	{
+		$$->deps[i] = list_nth($2, i);
+	}
+	attrCount = 0;
+}
+
+;
+
+dependency_item_list:
+dependency_item_list ',' dependency_item
+{
+	$$ = lappend($1, $3);
+}
+
+|dependency_item
+{
+	$$ = lappend(NIL, $1);
+}
+
+;
+
+dependency_item:
+'"' dependency_attrs '"' ':' DOUBLE
+{
+	$$ = (MVDependency *) palloc0(sizeof(MVDependency));
+	$$->degree = $5;
+	$$->nattributes = attrCount;
+
+	AttrNumber *ptr = build_attnums_array($2, &$$->nattributes);
+
+	for (int i = 0; i < $$->nattributes; i++)
+	{
+		$$->attributes[i] = *(ptr + i);
+	}
+	attrCount = 0;
+}
+
+;
+
+dependency_attrs:
+
+UCONST ARROW UCONST
+{
+	$$ = bms_make_singleton($1);
+	$$ = bms_add_member($$, $3);
+	attrCount += 2;
+}
+
+|attrs ARROW UCONST
+{
+	$$ = bms_add_member($1, $3);
+	attrCount += 1;
+}
+
+;
+
+%%
+
+#include "statistics_scanner.c"
diff --git a/src/backend/statistics/statistics_scanner.l b/src/backend/statistics/statistics_scanner.l
new file mode 100644
index 0000000000..849a970153
--- /dev/null
+++ b/src/backend/statistics/statistics_scanner.l
@@ -0,0 +1,118 @@
+%
+{
+#include "postgres.h"
+
+#include "utils/builtins.h"
+#include "parser/scansup.h"
+
+/* Handle to the buffer that the lexer uses internally */
+	static YY_BUFFER_STATE scanbufhandle;
+
+	%
+}
+
+%option noyywrap
+% option prefix = "statistic_yy"
+
+digit[0 - 9] +
+double[		0 - 9.]
+
+%%
+
+[		   \t \ n];
+
+"{"
+{
+	return '{';
+}
+
+"}"
+{
+	return '}';
+}
+
+","
+{
+	return ',';
+}
+
+"\""
+{
+	return '"';
+}
+
+":"
+{
+	return ':';
+}
+
+"=>"
+{
+	return ARROW;
+}
+
+{
+	digit
+} +
+
+{
+	yylval.uintval = strtoul(yytext, NULL, 10);
+	return UCONST;
+}
+
+{
+	double
+}		   +
+
+{
+	yylval.doubleval = strtod(yytext, NULL);
+	return DOUBLE;
+}
+
+		  %%
+
+			void
+yyerror(const char *message)
+{
+	if (*yytext == YY_END_OF_BUFFER_CHAR)
+	{
+		ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
+						errmsg("invalid input syntax for extended stats type"),
+						errdetail("%s at end of input", message)));
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("invalid input syntax for extended stats type"),
+				 errdetail("%s at or near \"%s\"", message, yytext)));
+	}
+}
+
+void
+statistics_scanner_init(const char *str)
+{
+	Size		slen = strlen(str);
+	char	   *scanbuf;
+
+	/*
+	 * Might be left over after ereport()
+	 */
+	if (YY_CURRENT_BUFFER)
+		yy_delete_buffer(YY_CURRENT_BUFFER);
+
+	/*
+	 * Make a scan buffer with special termination needed by flex.
+	 */
+	scanbuf = (char *) palloc(slen + 2);
+	memcpy(scanbuf, str, slen);
+	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+}
+
+void
+statistic_scanner_finish(void)
+{
+	yy_delete_buffer(scanbufhandle);
+	scanbufhandle = NULL;
+}
diff --git a/src/include/statistics/extended_stats_internal.h b/src/include/statistics/extended_stats_internal.h
index 7b55eb8ffa..1d40436119 100644
--- a/src/include/statistics/extended_stats_internal.h
+++ b/src/include/statistics/extended_stats_internal.h
@@ -127,4 +127,17 @@ extern Selectivity mcv_clause_selectivity_or(PlannerInfo *root,
 											 Selectivity *overlap_basesel,
 											 Selectivity *totalsel);
 
+/*
+ * Internal functions for parsing the statistics grammar, in statiatics_gram.y and
+ * statistics_scanner.l
+ */
+extern int	statistic_yyparse(void);
+extern int	statistic_yylex(void);
+extern void statistic_yyerror(const char *str) pg_attribute_noreturn();
+extern void statistic_scanner_init(const char *query_string);
+extern void statistic_scanner_finish(void);
+
+extern MVNDistinct *mvndistinct_parse_result;
+extern MVDependencies *mvdependencies_parse_result;
+
 #endif							/* EXTENDED_STATS_INTERNAL_H */
diff --git a/src/include/statistics/statistics.h b/src/include/statistics/statistics.h
index 17e3e7f881..b57b991419 100644
--- a/src/include/statistics/statistics.h
+++ b/src/include/statistics/statistics.h
@@ -22,6 +22,10 @@
 #define STATS_NDISTINCT_MAGIC		0xA352BFA4	/* struct identifier */
 #define STATS_NDISTINCT_TYPE_BASIC	1	/* struct version */
 
+#define PG_RETURN_MVNDistinct_P(X) return PointerGetDatum(X)
+#define PG_RETURN_MVDependencies_P(X) return PointerGetDatum(X)
+#define PG_RETURN_MCVList_P(X) return PointerGetDatum(X)
+
 /* MVNDistinctItem represents a single combination of columns */
 typedef struct MVNDistinctItem
 {
diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out
index 03880874c1..67f5bfd212 100644
--- a/src/test/regress/expected/stats_ext.out
+++ b/src/test/regress/expected/stats_ext.out
@@ -3290,3 +3290,280 @@ NOTICE:  drop cascades to 2 other objects
 DETAIL:  drop cascades to table tststats.priv_test_tbl
 drop cascades to view tststats.priv_test_view
 DROP USER regress_stats_user1;
+    -- Test pg_ndistinct_in
+drop table if exists tbl_distinct;
+NOTICE:  table "tbl_distinct" does not exist, skipping
+create table tbl_distinct(i int, ii pg_ndistinct);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tbl_distinct values (1, '{"1, 2": 1}');
+insert into tbl_distinct values (2, '{"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}');
+insert into tbl_distinct values (3, '{"123, 234": 11}');
+select * from tbl_distinct;
+ i |                       ii
+---+-------------------------------------------------
+ 2 | {"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}
+ 3 | {"123, 234": 11}
+ 1 | {"1, 2": 1}
+(3 rows)
+
+-- leading space
+insert into tbl_distinct values (1, ' {"1, 2": 1}');
+-- trailing space
+insert into tbl_distinct values (1, '{"1, 2": 1} ');
+-- unmatched quote
+insert into tbl_distinct values (1, '{"1", 2": 1} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1", 2": 1} ');
+                                            ^
+DETAIL:  syntax error at or near """
+-- space in attribute list
+insert into tbl_distinct values (1, '{"1 3, 2": 1} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1 3, 2": 1} ');
+                                            ^
+DETAIL:  syntax error at or near "3"
+-- colon in attribute list
+insert into tbl_distinct values (1, '{"1: 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1: 2": 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_distinct values (1, '{"1, 2:" 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1, 2:" 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_distinct values (1, '{":1 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{":1 2": 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+-- zero/single item attribute list
+insert into tbl_distinct values (1, '{"1": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+insert into tbl_distinct values (1, '{: 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{: 1}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_distinct values (1, '{"": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+insert into tbl_distinct values (1, '{" ": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{" ": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+insert into tbl_distinct values (1, '{}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{}');
+                                            ^
+DETAIL:  syntax error at or near "}"
+insert into tbl_distinct values (1, '{:}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{:}');
+                                            ^
+DETAIL:  syntax error at or near ":"
+-- illegal character
+insert into tbl_distinct values (1, '{"1,| 2": 1}');
+-- multiple consecutive characters
+insert into tbl_distinct values (1, '{"1,, 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1,, 2": 1}');
+                                            ^
+DETAIL:  syntax error at or near ","
+insert into tbl_distinct values (1, '{"1": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_distinct values (1, '{"1": 1}');
+                                            ^
+DETAIL:  syntax error at or near """
+-- Need to add check on catalog table insert that atribute numbers are legal
+-- (e.g. there shouldn't be attribute number 100 for a table with only 2
+-- columns also it should match)
+select * from tbl_distinct;
+ i |                       ii
+---+-------------------------------------------------
+ 1 | {"1, 2": 1}
+ 1 | {"1, 2": 1}
+ 1 | {"1, 2": 1}
+ 1 | {"1, 2": 1}
+ 2 | {"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}
+ 3 | {"123, 234": 11}
+(6 rows)
+
+    -- Test pg_dependencies_in
+drop table if exists tbl_dependencies;
+NOTICE:  table "tbl_dependencies" does not exist, skipping
+create table tbl_dependencies(i int, ii pg_dependencies);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000}');
+insert into tbl_dependencies values (2, '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+select * from tbl_dependencies;
+ i |                                         ii
+---+-------------------------------------------------------------------------------------
+ 2 | {"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}
+ 1 | {"1 => 2": 1.000000}
+(2 rows)
+
+-- leading space
+insert into tbl_dependencies values (1, ' {"1 => 2": 1.000000}');
+-- trailing space
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000} ');
+-- unmatched quote
+insert into tbl_dependencies values (1, '{"1" => 2": 1.000000} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1" => 2": 1.00000...
+                                                ^
+DETAIL:  syntax error at or near """
+-- Wrong format
+insert into tbl_dependencies values (1, '{"1, 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1, 2": 1.000000}'...
+                                                ^
+DETAIL:  syntax error at or near "1.000000"
+insert into tbl_dependencies values (1, '{"1 => 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 => 2": 1}');
+                                                ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000, " 2 => 1": 2}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 => 2": 1.000000...
+                                                ^
+DETAIL:  syntax error at or near "2"
+-- space in attribute list
+insert into tbl_dependencies values (1, '{"1 3 => 2": 1.000000} ');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 3 => 2": 1.0000...
+                                                ^
+DETAIL:  syntax error at or near "3"
+-- colon in attribute list
+insert into tbl_dependencies values (1, '{"1: 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1: 2": 1.000000}'...
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{"1 => 2:" 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 => 2:" 1.000000...
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{":1 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{":1 2": 1.000000}'...
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{"1, 2" 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1, 2" 1.000000}')...
+                                                ^
+DETAIL:  syntax error at or near "1.000000"
+-- zero/single item attribute list
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+insert into tbl_dependencies values (1, '{: 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{: 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near ":"
+insert into tbl_dependencies values (1, '{"": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+insert into tbl_dependencies values (1, '{" ": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{" ": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+insert into tbl_dependencies values (1, '{}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{}');
+                                                ^
+DETAIL:  syntax error at or near "}"
+insert into tbl_dependencies values (1, '{:}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{:}');
+                                                ^
+DETAIL:  syntax error at or near ":"
+-- multiple consecutive characters
+insert into tbl_dependencies values (1, '{"1 =>=> 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1 =>=> 2": 1.0000...
+                                                ^
+DETAIL:  syntax error at or near "=>"
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dependencies values (1, '{"1": 1.000000}');
+                                                ^
+DETAIL:  syntax error at or near """
+select * from tbl_dependencies;
+ i |                                         ii
+---+-------------------------------------------------------------------------------------
+ 2 | {"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}
+ 1 | {"1 => 2": 1.000000}
+ 1 | {"1 => 2": 1.000000}
+ 1 | {"1 => 2": 1.000000}
+(4 rows)
+
+-- Test a table with columns of type pg_ndistinct and pg_dependencies
+drop table if exists tbl_dist_dep;
+NOTICE:  table "tbl_dist_dep" does not exist, skipping
+create table tbl_dist_dep(i pg_ndistinct, ii pg_dependencies);
+NOTICE:  Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table.
+HINT:  The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 3, "1, 3" : 3, "2, 3" : 2, "1, 2, 3" : 3}', '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+-- unmatched quote
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{"1 => 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{1, 2 : 1}', '{"1 => 2": 1...
+                                         ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{1 => 2: 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1}', '{1 => 2: 1...
+                                                         ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{1 => 2: 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{1, 2 : 1}', '{1 => 2: 1.0...
+                                         ^
+DETAIL:  syntax error at or near "1"
+-- Invalid type
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2":...
+                                                         ^
+DETAIL:  syntax error at or near "1"
+insert into tbl_dist_dep values ('{"1, 2" : 1.000000}', '{"1 => 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1.000000}', '{"1...
+                                         ^
+DETAIL:  syntax error at or near "1.000000"
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1.000000 => 2.000000": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1.000000...
+                                                         ^
+DETAIL:  syntax error at or near "1.000000"
+insert into tbl_dist_dep values ('{"1, 2.000000" : 1}', '{"1 => 2": 1.000000}');
+ERROR:  invalid input syntax for extended stats type
+LINE 1: insert into tbl_dist_dep values ('{"1, 2.000000" : 1}', '{"1...
+                                         ^
+DETAIL:  syntax error at or near "2.000000"
+select * from tbl_dist_dep;
+                        i                        |                                         ii
+-------------------------------------------------+-------------------------------------------------------------------------------------
+ {"1, 2": 3, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3} | {"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}
+ {"1, 2": 1}                                     | {"1 => 2": 1.000000}
+(2 rows)
diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql
index d0d42cd013..5acf0f5465 100644
--- a/src/test/regress/sql/stats_ext.sql
+++ b/src/test/regress/sql/stats_ext.sql
@@ -1663,3 +1663,101 @@ DROP FUNCTION op_leak(int, int);
 RESET SESSION AUTHORIZATION;
 DROP SCHEMA tststats CASCADE;
 DROP USER regress_stats_user1;
+
+    -- Test pg_ndistinct_in
+drop table if exists tbl_distinct;
+create table tbl_distinct(i int, ii pg_ndistinct);
+insert into tbl_distinct values (1, '{"1, 2": 1}');
+insert into tbl_distinct values (2, '{"1, 2": 2, "1, 3": 3, "2, 3": 2, "1, 2, 3": 3}');
+insert into tbl_distinct values (3, '{"123, 234": 11}');
+select * from tbl_distinct;
+
+-- leading space
+insert into tbl_distinct values (1, ' {"1, 2": 1}');
+-- trailing space
+insert into tbl_distinct values (1, '{"1, 2": 1} ');
+-- unmatched quote
+insert into tbl_distinct values (1, '{"1", 2": 1} ');
+-- space in attribute list
+insert into tbl_distinct values (1, '{"1 3, 2": 1} ');
+-- colon in attribute list
+insert into tbl_distinct values (1, '{"1: 2": 1}');
+insert into tbl_distinct values (1, '{"1, 2:" 1}');
+insert into tbl_distinct values (1, '{":1 2": 1}');
+-- zero/single item attribute list
+insert into tbl_distinct values (1, '{"1": 1}');
+insert into tbl_distinct values (1, '{: 1}');
+insert into tbl_distinct values (1, '{"": 1}');
+insert into tbl_distinct values (1, '{" ": 1}');
+insert into tbl_distinct values (1, '{}');
+insert into tbl_distinct values (1, '{:}');
+-- illegal character
+insert into tbl_distinct values (1, '{"1,| 2": 1}');
+
+-- multiple consecutive characters
+insert into tbl_distinct values (1, '{"1,, 2": 1}');
+insert into tbl_distinct values (1, '{"1": 1}');
+
+-- Need to add check on catalog table insert that atribute numbers are legal
+-- (e.g. there shouldn't be attribute number 100 for a table with only 2
+-- columns also it should match)
+
+select * from tbl_distinct;
+
+
+-- Test pg_dependencies_in
+drop table if exists tbl_dependencies;
+create table tbl_dependencies(i int, ii pg_dependencies);
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000}');
+insert into tbl_dependencies values (2, '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+
+select * from tbl_dependencies;
+
+-- leading space
+insert into tbl_dependencies values (1, ' {"1 => 2": 1.000000}');
+-- trailing space
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000} ');
+-- unmatched quote
+insert into tbl_dependencies values (1, '{"1" => 2": 1.000000} ');
+-- Wrong format
+insert into tbl_dependencies values (1, '{"1, 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1 => 2": 1}');
+insert into tbl_dependencies values (1, '{"1 => 2": 1.000000, " 2 => 1": 2}');
+-- space in attribute list
+insert into tbl_dependencies values (1, '{"1 3 => 2": 1.000000} ');
+-- colon in attribute list
+insert into tbl_dependencies values (1, '{"1: 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1 => 2:" 1.000000}');
+insert into tbl_dependencies values (1, '{":1 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1, 2" 1.000000}');
+-- zero/single item attribute list
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+insert into tbl_dependencies values (1, '{: 1.000000}');
+insert into tbl_dependencies values (1, '{"": 1.000000}');
+insert into tbl_dependencies values (1, '{" ": 1.000000}');
+insert into tbl_dependencies values (1, '{}');
+insert into tbl_dependencies values (1, '{:}');
+
+-- multiple consecutive characters
+insert into tbl_dependencies values (1, '{"1 =>=> 2": 1.000000}');
+insert into tbl_dependencies values (1, '{"1": 1.000000}');
+
+select * from tbl_dependencies;
+
+
+-- Test a table with columns of type pg_ndistinct and pg_dependencies
+drop table if exists tbl_dist_dep;
+create table tbl_dist_dep(i pg_ndistinct, ii pg_dependencies);
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 3, "1, 3" : 3, "2, 3" : 2, "1, 2, 3" : 3}', '{"1 => 2": 2.000000, "1 => 3": 3.000000, "2 => 3": 2.000000, "1, 2 => 3": 3.000000}');
+-- unmatched quote
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{1 => 2: 1.000000}');
+insert into tbl_dist_dep values ('{1, 2 : 1}', '{1 => 2: 1.000000}');
+-- Invalid type
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1 => 2": 1}');
+insert into tbl_dist_dep values ('{"1, 2" : 1.000000}', '{"1 => 2": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2" : 1}', '{"1.000000 => 2.000000": 1.000000}');
+insert into tbl_dist_dep values ('{"1, 2.000000" : 1}', '{"1 => 2": 1.000000}');
+
+select * from tbl_dist_dep;
-- 
2.30.1 (Apple Git-130)

