From e1940cc02e97da2e84fd062feecc0a38bdbedd4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E4=B8=80=E6=8C=83?= <yizhi.fzh@alibaba-inc.com>
Date: Mon, 9 Aug 2021 19:43:58 +0800
Subject: [PATCH v4 5/6] Support UniqueKey on JoinRel.

---
 src/backend/optimizer/path/uniquekey.c  | 379 ++++++++++++++++++++++++
 src/backend/optimizer/util/relnode.c    |   2 +
 src/include/optimizer/paths.h           |   5 +
 src/test/regress/expected/uniquekey.out | 327 ++++++++++++++++++++
 src/test/regress/sql/uniquekey.sql      |  79 +++++
 5 files changed, 792 insertions(+)

diff --git a/src/backend/optimizer/path/uniquekey.c b/src/backend/optimizer/path/uniquekey.c
index c93075656d..815ade02c3 100644
--- a/src/backend/optimizer/path/uniquekey.c
+++ b/src/backend/optimizer/path/uniquekey.c
@@ -27,6 +27,26 @@ static bool add_uniquekey_for_uniqueindex(PlannerInfo *root,
 										  List *mergeable_const_peer,
 										  List *expr_opfamilies);
 
+static bool is_uniquekey_nulls_removed(PlannerInfo *root,
+									   UniqueKey *ukey,
+									   RelOptInfo *rel);
+static UniqueKey *adjust_uniquekey_multinull_for_joinrel(PlannerInfo *root,
+														 UniqueKey *joinrel_ukey,
+														 RelOptInfo *rel,
+														 bool below_outer_side);
+
+static bool populate_joinrel_uniquekey_for_rel(PlannerInfo *root, RelOptInfo *joinrel,
+											   RelOptInfo *rel, RelOptInfo *other_rel,
+											   List *restrictlist, JoinType jointype);
+static void populate_joinrel_composite_uniquekey(PlannerInfo *root,
+												 RelOptInfo *joinrel,
+												 RelOptInfo *outerrel,
+												 RelOptInfo *innerrel,
+												 List	*restrictlist,
+												 JoinType jointype,
+												 bool outeruk_still_valid,
+												 bool inneruk_still_valid);
+
 /* UniqueKey is subset of .. */
 static bool uniquekey_contains_in(PlannerInfo *root, UniqueKey *ukey,
 								  List *ecs, Relids relids);
@@ -35,6 +55,9 @@ static bool uniquekey_contains_in(PlannerInfo *root, UniqueKey *ukey,
 static bool unique_ecs_useful_for_distinct(PlannerInfo *root, List *ecs);
 static bool unique_ecs_useful_for_merging(PlannerInfo *root, RelOptInfo *rel,
 										  List *unique_ecs);
+static bool is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey *ukey,
+										  RelOptInfo *joinrel);
+
 /* Helper functions to create UniqueKey. */
 static UniqueKey *make_uniquekey(Bitmapset *unique_expr_indexes,
 								 bool multi_null,
@@ -90,6 +113,78 @@ populate_baserel_uniquekeys(PlannerInfo *root, RelOptInfo *rel)
 	print_uniquekey(root, rel);
 }
 
+/*
+ * populate_joinrel_uniquekeys
+ */
+void
+populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
+							RelOptInfo *outerrel, RelOptInfo *innerrel,
+							List *restrictlist, JoinType jointype)
+{
+	bool outeruk_still_valid = false, inneruk_still_valid = false;
+	if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+	{
+		ListCell	*lc;
+		foreach(lc, outerrel->uniquekeys)
+		{
+			/*
+			 * SEMI/ANTI join can be used to remove NULL values as well.
+			 * So we need to adjust multi_nulls for join.
+			 */
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys,
+										  adjust_uniquekey_multinull_for_joinrel(root,
+																				 lfirst(lc),
+																				 joinrel,
+																				 false));
+		}
+		return;
+	}
+
+	if (outerrel->uniquekeys == NIL || innerrel->uniquekeys == NIL)
+		return;
+
+	switch(jointype)
+	{
+		case JOIN_INNER:
+			outeruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, outerrel,
+																	 innerrel, restrictlist, jointype);
+			inneruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, innerrel,
+																	 outerrel, restrictlist, jointype);
+			break;
+
+		case JOIN_LEFT:
+			/*
+			 * For left join, we are sure the innerrel's multi_nulls would be true
+			 * and it can't become to multi_nulls=false any more. so just discard it
+			 * and only check the outerrel and composited ones.
+			 */
+			outeruk_still_valid = populate_joinrel_uniquekey_for_rel(root, joinrel, outerrel,
+																	 innerrel, restrictlist, jointype);
+			break;
+
+		case JOIN_FULL:
+			/*
+			 * Both sides would contains multi_nulls, don't maintain it
+			 * any more.
+			 */
+			break;
+
+		default:
+			elog(ERROR, "unexpected join_type %d", jointype);
+	}
+
+	populate_joinrel_composite_uniquekey(root, joinrel,
+										 outerrel,
+										 innerrel,
+										 restrictlist,
+										 jointype,
+										 outeruk_still_valid,
+										 inneruk_still_valid);
+
+
+	return;
+}
+
 /*
  * relation_is_distinct_for
  *		Check if the relation is distinct for.
@@ -238,6 +333,253 @@ add_uniquekey_for_uniqueindex(PlannerInfo *root, IndexOptInfo *unique_index,
 											 used_for_distinct));
 	return false;
 }
+
+/*
+ * is_uniquekey_nulls_removed
+ *
+ *	note this function will not consider the OUTER JOIN impacts. Caller should
+ * take care of it.
+ *	-- Use my way temporary (RelOptInfo.notnull_attrs) until Tom's is ready.
+ */
+static bool
+is_uniquekey_nulls_removed(PlannerInfo *root,
+						   UniqueKey *ukey,
+						   RelOptInfo *joinrel)
+{
+	int i = -1;
+
+	while((i = bms_next_member(ukey->unique_expr_indexes, i)) >= 0)
+	{
+		Node *node = list_nth(root->unique_exprs, i);
+		List	*ecs;
+		ListCell	*lc;
+		if (IsA(node, SingleRow))
+			continue;
+		ecs = castNode(List, node);
+		foreach(lc, ecs)
+		{
+			EquivalenceClass *ec = lfirst_node(EquivalenceClass, lc);
+			ListCell *emc;
+			foreach(emc, ec->ec_members)
+			{
+				EquivalenceMember *em = lfirst_node(EquivalenceMember, emc);
+				int relid;
+				Var *var;
+				Bitmapset *notnull_attrs;
+				if (!bms_is_subset(em->em_relids, joinrel->relids))
+					continue;
+
+				if (!bms_get_singleton_member(em->em_relids, &relid))
+					continue;
+
+				if (!IsA(em->em_expr, Var))
+					continue;
+
+				var = castNode(Var, em->em_expr);
+
+				if (relid != var->varno)
+					continue;
+
+				notnull_attrs = joinrel->notnull_attrs[var->varno];
+
+				if (!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
+								   notnull_attrs))
+					return false;
+				else
+					break; /* Break to check next ECs */
+			}
+		}
+	}
+	return true;
+}
+
+/*
+ * adjust_uniquekey_multinull_for_joinrel
+ *
+ *	After the join, some NULL values can be removed due to join-clauses.
+ * but the outer join can generated null values again. Return the final
+ * state of the UniqueKey on joinrel.
+ */
+static UniqueKey *
+adjust_uniquekey_multinull_for_joinrel(PlannerInfo *root,
+									   UniqueKey *ukey,
+									   RelOptInfo *joinrel,
+									   bool below_outer_side)
+{
+	if (below_outer_side)
+	{
+		if (ukey->multi_nulls)
+			/* we need it to be multi_nulls, but it is already, just return it. */
+			return ukey;
+		else
+			/* we need it to be multi_nulls, but it is not, create a new one. */
+			return make_uniquekey(ukey->unique_expr_indexes,
+								  true,
+								  ukey->use_for_distinct);
+	}
+	else
+	{
+		/*
+		 * We need to check if the join clauses can remove the NULL values. However
+		 * if it doesn't contain NULL values at the first, we don't need to check it.
+		 */
+		if (!ukey->multi_nulls)
+			return ukey;
+		else
+		{
+			/*
+			 * Multi null values exists. It's time to check if the nulls values
+			 * are removed via outer join.
+			 */
+			if (!is_uniquekey_nulls_removed(root, ukey, joinrel))
+				/* null values can be removed, return the original one. */
+				return ukey;
+			else
+				return make_uniquekey(ukey->unique_expr_indexes,
+									  false, ukey->use_for_distinct);
+		}
+	}
+}
+
+/*
+ * populate_joinrel_uniquekey_for_rel
+ *
+ *    Check if rel.any_column = other_rel.unique_key_columns.
+ * The return value is if the rel->uniquekeys still valid. If
+ * yes, added the uniquekeys in rel to joinrel and return true.
+ * otherwise, return false.
+ */
+static bool
+populate_joinrel_uniquekey_for_rel(PlannerInfo *root, RelOptInfo *joinrel,
+								   RelOptInfo *rel, RelOptInfo *other_rel,
+								   List *restrictlist, JoinType type)
+{
+	bool	rel_keep_unique = false;
+	List *other_ecs = NIL;
+	Relids	other_relids = NULL;
+	ListCell	*lc;
+
+	/*
+	 * Gather all the other ECs regarding to rel, if all the unique ecs contains
+	 * in this list, then it hits our expectations.
+	 */
+	foreach(lc, restrictlist)
+	{
+		RestrictInfo *r = lfirst_node(RestrictInfo, lc);
+
+		if (r->mergeopfamilies == NIL)
+			continue;
+
+		if (bms_equal(r->left_relids, rel->relids) && r->right_ec != NULL)
+		{
+			other_ecs = lappend(other_ecs, r->right_ec);
+			other_relids = bms_add_members(other_relids, r->right_relids);
+		}
+		else if (bms_equal(r->right_relids, rel->relids) && r->left_ec != NULL)
+		{
+			other_ecs = lappend(other_ecs, r->right_ec);
+			other_relids = bms_add_members(other_relids, r->left_relids);
+		}
+	}
+
+	foreach(lc, other_rel->uniquekeys)
+	{
+		UniqueKey *ukey = lfirst_node(UniqueKey, lc);
+		if (uniquekey_contains_in(root, ukey, other_ecs, other_relids))
+		{
+			rel_keep_unique = true;
+			break;
+		}
+	}
+
+	if (!rel_keep_unique)
+		return false;
+
+	foreach(lc, rel->uniquekeys)
+	{
+
+		UniqueKey *ukey = lfirst_node(UniqueKey, lc);
+
+		if (is_uniquekey_useful_afterjoin(root, ukey, joinrel))
+		{
+			ukey = adjust_uniquekey_multinull_for_joinrel(root,
+														  ukey,
+														  joinrel,
+														  false /* outer_side, caller grantees this */);
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys, ukey);
+		}
+	}
+
+	return true;
+}
+
+
+/*
+ * Populate_joinrel_composited_uniquekey
+ *
+ *	A composited unqiuekey is valid no matter with join type and restrictlist.
+ */
+static void
+populate_joinrel_composite_uniquekey(PlannerInfo *root,
+									 RelOptInfo *joinrel,
+									 RelOptInfo *outerrel,
+									 RelOptInfo *innerrel,
+									 List	*restrictlist,
+									 JoinType jointype,
+									 bool left_added,
+									 bool right_added)
+{
+	ListCell	*lc;
+	if (left_added || right_added)
+		/* No need to create the composited ones */
+		return;
+
+	foreach(lc, outerrel->uniquekeys)
+	{
+		UniqueKey	*outer_ukey = adjust_uniquekey_multinull_for_joinrel(root,
+																		 lfirst(lc),
+																		 joinrel,
+																		 jointype == JOIN_FULL);
+		ListCell	*lc2;
+
+		if (!is_uniquekey_useful_afterjoin(root, outer_ukey, joinrel))
+			continue;
+
+		foreach(lc2, innerrel->uniquekeys)
+		{
+			UniqueKey	*inner_ukey = adjust_uniquekey_multinull_for_joinrel(root,
+																			 lfirst(lc2),
+																			 joinrel,
+																			 (jointype == JOIN_FULL || jointype == JOIN_LEFT)
+				);
+
+			UniqueKey	*comp_ukey;
+
+			if (!is_uniquekey_useful_afterjoin(root, inner_ukey, joinrel))
+				continue;
+
+			comp_ukey = make_uniquekey(
+				/* unique_expr_indexes is easy, just union the both sides. */
+				bms_union(outer_ukey->unique_expr_indexes, inner_ukey->unique_expr_indexes),
+				/*
+				 * If both are !multi_nulls, then the composited one is !multi_null
+				 * no matter with jointype and join clauses. otherwise, it is multi
+				 * nulls no matter with other factors.
+				 *
+				 */
+				outer_ukey->multi_nulls || inner_ukey->multi_nulls,
+				/*
+				 * we need both sides are used in distinct to say the composited
+				 * one is used for distinct as well.
+				 */
+				outer_ukey->use_for_distinct && inner_ukey->use_for_distinct);
+
+			joinrel->uniquekeys = lappend(joinrel->uniquekeys, comp_ukey);
+		}
+	}
+}
+
+
 /*
  * uniquekey_contains_in
  *	Return if UniqueKey contains in the list of EquivalenceClass
@@ -333,6 +675,43 @@ unique_ecs_useful_for_merging(PlannerInfo *root, RelOptInfo *rel, List *unique_e
 
 	return true;
 }
+
+/*
+ * is_uniquekey_useful_afterjoin
+ *
+ *  is useful when it contains in distinct_pathkey or in mergable join clauses.
+ */
+static bool
+is_uniquekey_useful_afterjoin(PlannerInfo *root, UniqueKey *ukey,
+							 RelOptInfo *joinrel)
+{
+	int	i = -1;
+
+	if (ukey->use_for_distinct)
+		return true;
+
+	while((i = bms_next_member(ukey->unique_expr_indexes, i)) >= 0)
+	{
+		Node *exprs =  list_nth(root->unique_exprs, i);
+		if (IsA(exprs, List))
+		{
+			if (!unique_ecs_useful_for_merging(root, joinrel, (List *)exprs))
+				return false;
+		}
+		else
+		{
+			Assert(IsA(exprs, SingleRow));
+			/*
+			 * Ideally we should check if there are a expr on SingleRow
+			 * used in joinrel's joinclauses, but it can't be checked effectively
+			 * for now, so we just check the rest part. so just think
+			 * it is useful.
+			 */
+		}
+	}
+	return true;
+}
+
 /*
  *	make_uniquekey
  */
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index b75e1679e6..6695f9f2a6 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -846,6 +846,8 @@ build_join_rel(PlannerInfo *root,
 	}
 
 	set_joinrel_notnull_attrs(joinrel, outer_rel, inner_rel, restrictlist, sjinfo);
+	populate_joinrel_uniquekeys(root, joinrel, outer_rel, inner_rel,
+								restrictlist, sjinfo->jointype);
 
 	return joinrel;
 }
diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h
index 68b8b40ca9..f233837e59 100644
--- a/src/include/optimizer/paths.h
+++ b/src/include/optimizer/paths.h
@@ -265,6 +265,11 @@ extern void add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
 
 extern void populate_baserel_uniquekeys(PlannerInfo *root,
 										RelOptInfo *baserel);
+extern void populate_baserel_uniquekeys(PlannerInfo *root,
+										RelOptInfo *baserel);
+extern void populate_joinrel_uniquekeys(PlannerInfo *root, RelOptInfo *joinrel,
+										RelOptInfo *outerrel, RelOptInfo *innerrel,
+										List *restrictlist, JoinType jointype);
 extern bool relation_is_distinct_for(PlannerInfo *root, RelOptInfo *rel,
 									 List *distinct_pathkey);
 #endif							/* PATHS_H */
diff --git a/src/test/regress/expected/uniquekey.out b/src/test/regress/expected/uniquekey.out
index d9a8634e80..c2bd1fa619 100644
--- a/src/test/regress/expected/uniquekey.out
+++ b/src/test/regress/expected/uniquekey.out
@@ -80,3 +80,330 @@ EXPLAIN (COSTS OFF) SELECT DISTINCT ON(pk) d FROM uqk1;
  Seq Scan on uqk1
 (1 row)
 
+------------------------------------------------------
+-- Test UniqueKey on one side still valid after join.
+-----------------------------------------------------
+-- uqk1(c, d) is the uniquekey with mutli nulls at single relation access.
+-- so distinct is not no-op.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE uqk1.a = uqk2.pk;
+                 QUERY PLAN                  
+---------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.c, uqk1.d
+         ->  Hash Join
+               Hash Cond: (uqk1.a = uqk2.pk)
+               ->  Seq Scan on uqk1
+               ->  Hash
+                     ->  Seq Scan on uqk2
+(8 rows)
+
+-- Both uqk1 (c,d) are a valid uniquekey. 
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk;
+           QUERY PLAN            
+---------------------------------
+ Hash Join
+   Hash Cond: (uqk1.a = uqk2.pk)
+   ->  Seq Scan on uqk1
+         Filter: (c IS NOT NULL)
+   ->  Hash
+         ->  Seq Scan on uqk2
+(6 rows)
+
+-- uqk1.c is null at baserel, but the null values are removed after join.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE  uqk1.a = uqk2.pk and uqk1.c = uqk2.c;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Hash Join
+   Hash Cond: ((uqk1.a = uqk2.pk) AND (uqk1.c = uqk2.c))
+   ->  Seq Scan on uqk1
+   ->  Hash
+         ->  Seq Scan on uqk2
+(5 rows)
+
+-- uqk1.c is null at baserel, but the null values are removed after join
+-- but new null values are generated due to outer join again. so distinct
+-- is still needed.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 right join uqk2
+on uqk1.a = uqk2.pk and uqk1.c = uqk2.c;
+                             QUERY PLAN                              
+---------------------------------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.c, uqk1.d
+         ->  Hash Right Join
+               Hash Cond: ((uqk1.a = uqk2.pk) AND (uqk1.c = uqk2.c))
+               ->  Seq Scan on uqk1
+               ->  Hash
+                     ->  Seq Scan on uqk2
+(8 rows)
+
+------------------------------------------------------
+-- Test join: Composited UniqueKey
+-----------------------------------------------------
+-- both t1.pk and t1.pk is valid uniquekey.
+EXPLAIN SELECT DISTINCT t1.pk, t2.pk FROM uqk1 t1 cross join uqk2 t2;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Nested Loop  (cost=0.00..2.27 rows=15 width=8)
+   ->  Seq Scan on uqk1 t1  (cost=0.00..1.05 rows=5 width=4)
+   ->  Materialize  (cost=0.00..1.04 rows=3 width=4)
+         ->  Seq Scan on uqk2 t2  (cost=0.00..1.03 rows=3 width=4)
+(4 rows)
+
+SELECT DISTINCT t1.pk, t2.pk FROM uqk1 t1 cross join uqk2 t2 order by 1, 2;
+ pk | pk 
+----+----
+  1 |  1
+  1 |  4
+  1 |  5
+  2 |  1
+  2 |  4
+  2 |  5
+  3 |  1
+  3 |  4
+  3 |  5
+  4 |  1
+  4 |  4
+  4 |  5
+  5 |  1
+  5 |  4
+  5 |  5
+(15 rows)
+
+-- NOT OK, since t1.c includes multi nulls. 
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is null;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ HashAggregate  (cost=2.20..2.26 rows=6 width=12)
+   Group Key: t1.c, t1.d, t2.pk
+   ->  Nested Loop  (cost=0.00..2.16 rows=6 width=12)
+         ->  Seq Scan on uqk2 t2  (cost=0.00..1.03 rows=3 width=4)
+         ->  Materialize  (cost=0.00..1.06 rows=2 width=8)
+               ->  Seq Scan on uqk1 t1  (cost=0.00..1.05 rows=2 width=8)
+                     Filter: (c IS NULL)
+(7 rows)
+
+SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is null order by 1, 2,3;
+ c | d | pk 
+---+---+----
+   | 4 |  1
+   | 4 |  4
+   | 4 |  5
+(3 rows)
+
+SELECT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is null order by 1, 2,3;
+ c | d | pk 
+---+---+----
+   | 4 |  1
+   | 4 |  1
+   | 4 |  4
+   | 4 |  4
+   | 4 |  5
+   | 4 |  5
+(6 rows)
+
+-- let's remove the t1.c's multi null values
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null;
+                            QUERY PLAN                             
+-------------------------------------------------------------------
+ Nested Loop  (cost=0.00..2.20 rows=9 width=12)
+   ->  Seq Scan on uqk1 t1  (cost=0.00..1.05 rows=3 width=8)
+         Filter: (c IS NOT NULL)
+   ->  Materialize  (cost=0.00..1.04 rows=3 width=4)
+         ->  Seq Scan on uqk2 t2  (cost=0.00..1.03 rows=3 width=4)
+(5 rows)
+
+SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null order by 1, 2, 3 ;
+ c | d | pk 
+---+---+----
+ 1 | 1 |  1
+ 1 | 1 |  4
+ 1 | 1 |  5
+ 2 | 2 |  1
+ 2 | 2 |  4
+ 2 | 2 |  5
+ 3 | 3 |  1
+ 3 | 3 |  4
+ 3 | 3 |  5
+(9 rows)
+
+SELECT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null order by 1, 2, 3;
+ c | d | pk 
+---+---+----
+ 1 | 1 |  1
+ 1 | 1 |  4
+ 1 | 1 |  5
+ 2 | 2 |  1
+ 2 | 2 |  4
+ 2 | 2 |  5
+ 3 | 3 |  1
+ 3 | 3 |  4
+ 3 | 3 |  5
+(9 rows)
+
+-- test onerow case with composited cases.
+-- t2.c is onerow. OK
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null and t2.pk = 1;
+                         QUERY PLAN                          
+-------------------------------------------------------------
+ Nested Loop  (cost=0.00..2.12 rows=3 width=12)
+   ->  Seq Scan on uqk2 t2  (cost=0.00..1.04 rows=1 width=4)
+         Filter: (pk = 1)
+   ->  Seq Scan on uqk1 t1  (cost=0.00..1.05 rows=3 width=8)
+         Filter: (c IS NOT NULL)
+(5 rows)
+
+SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null and t2.pk = 1;
+ c | d | c 
+---+---+---
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 3 | 3 | 1
+(3 rows)
+
+SELECT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null and t2.pk = 1;
+ c | d | c 
+---+---+---
+ 1 | 1 | 1
+ 2 | 2 | 1
+ 3 | 3 | 1
+(3 rows)
+
+-- t2.c is onerow, but t1.c has multi-nulls, NOt OK.
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is null and t2.pk = 1;
+                               QUERY PLAN                                
+-------------------------------------------------------------------------
+ Unique  (cost=2.12..2.14 rows=2 width=12)
+   ->  Sort  (cost=2.12..2.12 rows=2 width=12)
+         Sort Key: t1.c, t1.d, t2.c
+         ->  Nested Loop  (cost=0.00..2.11 rows=2 width=12)
+               ->  Seq Scan on uqk2 t2  (cost=0.00..1.04 rows=1 width=4)
+                     Filter: (pk = 1)
+               ->  Seq Scan on uqk1 t1  (cost=0.00..1.05 rows=2 width=8)
+                     Filter: (c IS NULL)
+(8 rows)
+
+SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is null and t2.pk = 1;
+ c | d | c 
+---+---+---
+   | 4 | 1
+(1 row)
+
+SELECT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is null and t2.pk = 1;
+ c | d | c 
+---+---+---
+   | 4 | 1
+   | 4 | 1
+(2 rows)
+
+-- Test Semi/Anti JOIN
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2);
+           QUERY PLAN           
+--------------------------------
+ Hash Semi Join
+   Hash Cond: (uqk1.d = uqk2.d)
+   ->  Seq Scan on uqk1
+   ->  Hash
+         ->  Seq Scan on uqk2
+(5 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2);
+             QUERY PLAN             
+------------------------------------
+ Seq Scan on uqk1
+   Filter: (NOT (hashed SubPlan 1))
+   SubPlan 1
+     ->  Seq Scan on uqk2
+(4 rows)
+
+-----------------------------------
+-- Test Join: Special OneRow case.
+-----------------------------------
+-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well.
+-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk1.c = uqk2.pk;
+            QUERY PLAN             
+-----------------------------------
+ Nested Loop
+   Join Filter: (uqk1.c = uqk2.pk)
+   ->  Seq Scan on uqk1
+         Filter: (pk = 1)
+   ->  Seq Scan on uqk2
+(5 rows)
+
+SELECT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk1.c = uqk2.pk order BY 1;
+ d 
+---
+ 1
+(1 row)
+
+-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1
+AND uqk2.pk = 1 AND uqk1.d = uqk2.d;
+            QUERY PLAN            
+----------------------------------
+ Nested Loop
+   Join Filter: (uqk1.d = uqk2.d)
+   ->  Seq Scan on uqk1
+         Filter: (pk = 1)
+   ->  Seq Scan on uqk2
+         Filter: (pk = 1)
+(6 rows)
+
+SELECT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk2.pk = 1
+AND uqk1.d = uqk2.d order BY 1;
+ d 
+---
+ 1
+(1 row)
+
+-- Both UniqueKey in targetList, so distinct is not needed.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1;
+        QUERY PLAN        
+--------------------------
+ Nested Loop
+   ->  Seq Scan on uqk1
+         Filter: (pk = 2)
+   ->  Seq Scan on uqk2
+         Filter: (pk = 1)
+(5 rows)
+
+SELECT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2;
+ c | c 
+---+---
+ 2 | 1
+(1 row)
+
+-----------------------------------------
+-- Test more non-unique cases after join.
+-----------------------------------------
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1, uqk2 WHERE uqk1.c = uqk2.c;
+                 QUERY PLAN                 
+--------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.pk
+         ->  Hash Join
+               Hash Cond: (uqk1.c = uqk2.c)
+               ->  Seq Scan on uqk1
+               ->  Hash
+                     ->  Seq Scan on uqk2
+(8 rows)
+
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk1.c = uqk2.c;
+                  QUERY PLAN                  
+----------------------------------------------
+ Unique
+   ->  Sort
+         Sort Key: uqk1.d
+         ->  Nested Loop
+               Join Filter: (uqk1.c = uqk2.c)
+               ->  Seq Scan on uqk1
+                     Filter: (pk = 1)
+               ->  Seq Scan on uqk2
+(8 rows)
+
diff --git a/src/test/regress/sql/uniquekey.sql b/src/test/regress/sql/uniquekey.sql
index a1b538d1c1..3f93872246 100644
--- a/src/test/regress/sql/uniquekey.sql
+++ b/src/test/regress/sql/uniquekey.sql
@@ -27,3 +27,82 @@ EXPLAIN (COSTS OFF) SELECT DISTINCT a FROM uqk1 WHERE c = 1 and d = 1;
 
 -- Test Distinct ON
 EXPLAIN (COSTS OFF) SELECT DISTINCT ON(pk) d FROM uqk1;
+
+------------------------------------------------------
+-- Test UniqueKey on one side still valid after join.
+-----------------------------------------------------
+-- uqk1(c, d) is the uniquekey with mutli nulls at single relation access.
+-- so distinct is not no-op.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE uqk1.a = uqk2.pk;
+
+-- Both uqk1 (c,d) are a valid uniquekey. 
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE uqk1.c is NOT NULL AND uqk1.a = uqk2.pk;
+
+-- uqk1.c is null at baserel, but the null values are removed after join.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1, uqk2
+WHERE  uqk1.a = uqk2.pk and uqk1.c = uqk2.c;
+
+-- uqk1.c is null at baserel, but the null values are removed after join
+-- but new null values are generated due to outer join again. so distinct
+-- is still needed.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk1.d FROM uqk1 right join uqk2
+on uqk1.a = uqk2.pk and uqk1.c = uqk2.c;
+
+
+------------------------------------------------------
+-- Test join: Composited UniqueKey
+-----------------------------------------------------
+-- both t1.pk and t1.pk is valid uniquekey.
+EXPLAIN SELECT DISTINCT t1.pk, t2.pk FROM uqk1 t1 cross join uqk2 t2;
+SELECT DISTINCT t1.pk, t2.pk FROM uqk1 t1 cross join uqk2 t2 order by 1, 2;
+
+-- NOT OK, since t1.c includes multi nulls. 
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is null;
+SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is null order by 1, 2,3;
+SELECT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is null order by 1, 2,3;
+
+-- let's remove the t1.c's multi null values
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null;
+SELECT DISTINCT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null order by 1, 2, 3 ;
+SELECT t1.c, t1.d, t2.pk FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null order by 1, 2, 3;
+
+-- test onerow case with composited cases.
+
+-- t2.c is onerow. OK
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null and t2.pk = 1;
+SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null and t2.pk = 1;
+SELECT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is not null and t2.pk = 1;
+
+-- t2.c is onerow, but t1.c has multi-nulls, NOt OK.
+EXPLAIN SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is null and t2.pk = 1;
+SELECT DISTINCT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is null and t2.pk = 1;
+SELECT t1.c, t1.d, t2.c FROM uqk1 t1 cross join uqk2 t2 where t1.c is null and t2.pk = 1;
+
+
+-- Test Semi/Anti JOIN
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d in (SELECT d FROM uqk2);
+EXPLAIN (COSTS OFF) SELECT DISTINCT pk FROM uqk1 WHERE d NOT in (SELECT d FROM uqk2);
+
+-----------------------------------
+-- Test Join: Special OneRow case.
+-----------------------------------
+-- Test Unique Key FOR one-row case, DISTINCT is NOT needed as well.
+-- uqk1.d is the a uniquekey due to onerow rule. uqk2.pk is pk.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk1.c = uqk2.pk;
+SELECT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk1.c = uqk2.pk order BY 1;
+-- Both uqk1.d AND uqk2.c are the a uniquekey due to onerow rule
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1
+AND uqk2.pk = 1 AND uqk1.d = uqk2.d;
+SELECT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk2.pk = 1
+AND uqk1.d = uqk2.d order BY 1;
+-- Both UniqueKey in targetList, so distinct is not needed.
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1;
+SELECT uqk1.c, uqk2.c FROM uqk1, uqk2 WHERE uqk1.pk = 2 AND uqk2.pk = 1 order BY 1, 2;
+
+-----------------------------------------
+-- Test more non-unique cases after join.
+-----------------------------------------
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.pk FROM uqk1, uqk2 WHERE uqk1.c = uqk2.c;
+EXPLAIN (COSTS OFF) SELECT DISTINCT uqk1.d FROM uqk1, uqk2 WHERE uqk1.pk = 1 AND uqk1.c = uqk2.c;
-- 
2.21.0

