From a41c27b140000a3b72f1b277dd21b2a3e93f5517 Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 15 Jan 2026 11:43:17 +0900
Subject: [PATCH v2] Remove no-op PlaceHolderVars

When pulling up a subquery, we may need to wrap its targetlist items
in PlaceHolderVars if the subquery is under an outer join.  This
ensures that they are evaluated at the right place and are forced to
NULL when the outer join should do so.  However, if the outer join is
later reduced to an inner join, these PlaceHolderVars effectively
become no-ops regarding nullability.

It is desirable to remove these no-op PlaceHolderVars because they can
constrain optimization opportunities, such as blocking subexpression
folding or forcing join order.  Previously, however, we did not try to
remove them because PHVs are also used to enforce separate identity of
subexpressions, and we could not distinguish between the two cases.
Additionally, simply removing the PHV wrapper could expose the
contained expression in a way that violates various invariants
established by expression preprocessing, such as creating nested AND
clauses or stacked RelabelType nodes.

This patch removes no-op PlaceHolderVars by marking PHVs as preserved
if they are used to enforce subexpression identity.  Furthermore, to
ensure that stripping the PHV wrapper does not violate expression tree
invariants, we only do so when the contained expression is known to be
safe to expose to the surrounding expression structure.
---
 src/backend/optimizer/path/equivclass.c   |  4 +-
 src/backend/optimizer/path/pathkeys.c     |  2 +-
 src/backend/optimizer/plan/initsplan.c    |  6 +-
 src/backend/optimizer/plan/planner.c      |  4 +-
 src/backend/optimizer/plan/setrefs.c      |  4 +-
 src/backend/optimizer/prep/prepjointree.c | 41 +++++++++++---
 src/backend/optimizer/util/placeholder.c  | 11 ++--
 src/backend/optimizer/util/relnode.c      |  4 +-
 src/backend/optimizer/util/tlist.c        |  2 +-
 src/backend/rewrite/rewriteManip.c        | 69 +++++++++++++++++++++--
 src/backend/utils/adt/selfuncs.c          |  8 ++-
 src/include/nodes/pathnodes.h             |  8 +++
 src/include/rewrite/rewriteManip.h        |  3 +-
 13 files changed, 132 insertions(+), 34 deletions(-)

diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c
index a4fcfcc86c8..d1dd6ba725f 100644
--- a/src/backend/optimizer/path/equivclass.c
+++ b/src/backend/optimizer/path/equivclass.c
@@ -2471,8 +2471,8 @@ reconsider_full_join_clause(PlannerInfo *root, OuterJoinClauseInfo *ojcinfo)
 				 * representation for FULL JOIN USING output columns, this
 				 * wouldn't be needed?)
 				 */
-				cfirst = remove_nulling_relids(cfirst, fjrelids, NULL);
-				csecond = remove_nulling_relids(csecond, fjrelids, NULL);
+				cfirst = remove_nulling_relids(cfirst, fjrelids, NULL, false);
+				csecond = remove_nulling_relids(csecond, fjrelids, NULL, false);
 
 				if (equal(leftvar, cfirst) && equal(rightvar, csecond))
 				{
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 5eb71635d15..0b610e46df8 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1408,7 +1408,7 @@ make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
 			sortkey = (Expr *)
 				remove_nulling_relids((Node *) sortkey,
 									  bms_make_singleton(root->group_rtindex),
-									  NULL);
+									  NULL, true);
 		}
 		pathkey = make_pathkey_from_sortop(root,
 										   sortkey,
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c
index 97ea95a4eb8..071b39df0d4 100644
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -2643,13 +2643,15 @@ deconstruct_distribute_oj_quals(PlannerInfo *root,
 		 *
 		 * We first strip out all the nullingrels bits corresponding to
 		 * commuting joins below this one, and then successively put them back
-		 * as we crawl up the join stack.
+		 * as we crawl up the join stack.  Note that we do not allow stripping
+		 * no-op PlaceHolderVars here; if a PHV were removed, we would be
+		 * unable to restore its nullingrels bits later.
 		 */
 		quals = jtitem->oj_joinclauses;
 		if (!bms_is_empty(joins_below))
 			quals = (List *) remove_nulling_relids((Node *) quals,
 												   joins_below,
-												   NULL);
+												   NULL, false);
 
 		/*
 		 * We'll need to mark the lower versions of the quals as not safe to
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 7a6b8b749f2..9b897721de6 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -5584,7 +5584,7 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 				expr = (Expr *)
 					remove_nulling_relids((Node *) expr,
 										  bms_make_singleton(root->group_rtindex),
-										  NULL);
+										  NULL, true);
 			}
 			add_column_to_pathtarget(input_target, expr, sgref);
 		}
@@ -5628,7 +5628,7 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 		non_group_vars = (List *)
 			remove_nulling_relids((Node *) non_group_vars,
 								  bms_make_singleton(root->group_rtindex),
-								  NULL);
+								  NULL, true);
 	}
 	add_new_columns_to_pathtarget(input_target, non_group_vars);
 
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index 16d200cfb46..e63d52f0d8c 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -2523,11 +2523,11 @@ set_upper_references(PlannerInfo *root, Plan *plan, int rtoffset)
 		plan->targetlist = (List *)
 			remove_nulling_relids((Node *) plan->targetlist,
 								  bms_make_singleton(root->group_rtindex),
-								  NULL);
+								  NULL, true);
 		plan->qual = (List *)
 			remove_nulling_relids((Node *) plan->qual,
 								  bms_make_singleton(root->group_rtindex),
-								  NULL);
+								  NULL, true);
 	}
 
 	output_targetlist = NIL;
diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index c80bfc88d82..9e93775d91c 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -2725,8 +2725,9 @@ pullup_replace_vars_callback(Var *var,
 	 * references to the same subquery output as being equal().  So it's worth
 	 * a bit of extra effort to avoid it.
 	 *
-	 * The cached items have phlevelsup = 0 and phnullingrels = NULL; we'll
-	 * copy them and adjust those values for this reference site below.
+	 * The cached items have phlevelsup = 0, phnullingrels = NULL, and
+	 * phpreserved = false.  We'll copy them and adjust phpreserved here; the
+	 * other values are adjusted for this reference site below.
 	 */
 	if (need_phv &&
 		varattno >= InvalidAttrNumber &&
@@ -2735,6 +2736,13 @@ pullup_replace_vars_callback(Var *var,
 	{
 		/* Just copy the entry and fall through to adjust phlevelsup etc */
 		newnode = copyObject(rcon->rv_cache[varattno]);
+
+		/*
+		 * Mark the PlaceHolderVar as preserved if it's used for
+		 * identification purposes.
+		 */
+		if (rcon->wrap_option != REPLACE_WRAP_NONE)
+			((PlaceHolderVar *) newnode)->phpreserved = true;
 	}
 	else
 	{
@@ -2920,6 +2928,13 @@ pullup_replace_vars_callback(Var *var,
 				if (varattno >= InvalidAttrNumber &&
 					varattno <= list_length(rcon->targetlist))
 					rcon->rv_cache[varattno] = copyObject(newnode);
+
+				/*
+				 * Mark the PlaceHolderVar as preserved if it's used for
+				 * identification purposes.
+				 */
+				if (rcon->wrap_option != REPLACE_WRAP_NONE)
+					((PlaceHolderVar *) newnode)->phpreserved = true;
 			}
 		}
 	}
@@ -3196,23 +3211,28 @@ reduce_outer_joins(PlannerInfo *root)
 	 * remove references to those joins as nulling rels.  This is handled as
 	 * an additional pass, for simplicity and because we can handle all
 	 * fully-reduced joins in a single pass over the parse tree.
+	 *
+	 * We allow stripping no-op PlaceHolderVars here, as this is a
+	 * tree-simplification pass and we want to remove useless wrappers.
 	 */
 	if (!bms_is_empty(state2.inner_reduced))
 	{
 		root->parse = (Query *)
 			remove_nulling_relids((Node *) root->parse,
 								  state2.inner_reduced,
-								  NULL);
+								  NULL, true);
 		/* There could be references in the append_rel_list, too */
 		root->append_rel_list = (List *)
 			remove_nulling_relids((Node *) root->append_rel_list,
 								  state2.inner_reduced,
-								  NULL);
+								  NULL, true);
 	}
 
 	/*
 	 * Partially-reduced full joins have to be done one at a time, since
 	 * they'll each need a different setting of except_relids.
+	 *
+	 * As above, we allow stripping no-op PlaceHolderVars.
 	 */
 	foreach(lc, state2.partial_reduced)
 	{
@@ -3222,11 +3242,13 @@ reduce_outer_joins(PlannerInfo *root)
 		root->parse = (Query *)
 			remove_nulling_relids((Node *) root->parse,
 								  full_join_relids,
-								  statep->unreduced_side);
+								  statep->unreduced_side,
+								  true);
 		root->append_rel_list = (List *)
 			remove_nulling_relids((Node *) root->append_rel_list,
 								  full_join_relids,
-								  statep->unreduced_side);
+								  statep->unreduced_side,
+								  true);
 	}
 }
 
@@ -3680,18 +3702,21 @@ remove_useless_result_rtes(PlannerInfo *root)
 	 * know that such an outer join wouldn't really have nulled anything.)  We
 	 * don't do this during the main recursion, for simplicity and because we
 	 * can handle all such joins in a single pass over the parse tree.
+	 *
+	 * We allow stripping no-op PlaceHolderVars here, as this is a
+	 * tree-simplification pass and we want to remove useless wrappers.
 	 */
 	if (!bms_is_empty(dropped_outer_joins))
 	{
 		root->parse = (Query *)
 			remove_nulling_relids((Node *) root->parse,
 								  dropped_outer_joins,
-								  NULL);
+								  NULL, true);
 		/* There could be references in the append_rel_list, too */
 		root->append_rel_list = (List *)
 			remove_nulling_relids((Node *) root->append_rel_list,
 								  dropped_outer_joins,
-								  NULL);
+								  NULL, true);
 	}
 
 	/*
diff --git a/src/backend/optimizer/util/placeholder.c b/src/backend/optimizer/util/placeholder.c
index e1706363c88..4279fd85117 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -44,11 +44,11 @@ static bool contain_placeholder_references_walker(Node *node,
  * phrels is the syntactic location (as a set of relids) to attribute
  * to the expression.
  *
- * The caller is responsible for adjusting phlevelsup and phnullingrels
- * as needed.  Because we do not know here which query level the PHV
- * will be associated with, it's important that this function touches
- * only root->glob; messing with other parts of PlannerInfo would be
- * likely to do the wrong thing.
+ * The caller is responsible for adjusting phlevelsup, phnullingrels, and
+ * phpreserved as needed.  Because we do not know here which query level
+ * the PHV will be associated with, it's important that this function
+ * touches only root->glob; messing with other parts of PlannerInfo
+ * would be likely to do the wrong thing.
  */
 PlaceHolderVar *
 make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
@@ -58,6 +58,7 @@ make_placeholder_expr(PlannerInfo *root, Expr *expr, Relids phrels)
 	phv->phexpr = expr;
 	phv->phrels = phrels;
 	phv->phnullingrels = NULL;	/* caller may change this later */
+	phv->phpreserved = false;	/* caller may change this later */
 	phv->phid = ++(root->glob->lastPHId);
 	phv->phlevelsup = 0;		/* caller may change this later */
 
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index f57631e876f..2d4574ce916 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -2264,11 +2264,11 @@ have_partkey_equi_join(PlannerInfo *root, RelOptInfo *joinrel,
 			if (bms_overlap(rel1->relids, root->outer_join_rels))
 				expr1 = (Expr *) remove_nulling_relids((Node *) expr1,
 													   root->outer_join_rels,
-													   NULL);
+													   NULL, false);
 			if (bms_overlap(rel2->relids, root->outer_join_rels))
 				expr2 = (Expr *) remove_nulling_relids((Node *) expr2,
 													   root->outer_join_rels,
-													   NULL);
+													   NULL, false);
 		}
 
 		/*
diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c
index 752ea9222f6..ca66b07009e 100644
--- a/src/backend/optimizer/util/tlist.c
+++ b/src/backend/optimizer/util/tlist.c
@@ -1161,7 +1161,7 @@ split_pathtarget_walker(Node *node, split_pathtarget_context *context)
 		sanitized_node =
 			remove_nulling_relids(node,
 								  bms_make_singleton(context->root->group_rtindex),
-								  NULL);
+								  NULL, true);
 	}
 
 	/*
diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c
index 6fa174412f2..16c295b2c14 100644
--- a/src/backend/rewrite/rewriteManip.c
+++ b/src/backend/rewrite/rewriteManip.c
@@ -53,6 +53,7 @@ typedef struct
 	const Bitmapset *removable_relids;
 	const Bitmapset *except_relids;
 	int			sublevels_up;
+	bool		remove_noop_phvs;
 } remove_nulling_relids_context;
 
 static bool contain_aggs_of_level_walker(Node *node,
@@ -68,6 +69,7 @@ static Node *add_nulling_relids_mutator(Node *node,
 										add_nulling_relids_context *context);
 static Node *remove_nulling_relids_mutator(Node *node,
 										   remove_nulling_relids_context *context);
+static bool is_safe_to_strip_payload(Node *node);
 
 
 /*
@@ -1323,17 +1325,22 @@ add_nulling_relids_mutator(Node *node,
  * in Var.varnullingrels and PlaceHolderVar.phnullingrels fields within
  * the given expression, except in nodes belonging to rels listed in
  * except_relids.
+ *
+ * If remove_noop_phvs is true, we also check if PlaceHolderVars become
+ * no-ops and strip them if so.
  */
 Node *
 remove_nulling_relids(Node *node,
 					  const Bitmapset *removable_relids,
-					  const Bitmapset *except_relids)
+					  const Bitmapset *except_relids,
+					  bool remove_noop_phvs)
 {
 	remove_nulling_relids_context context;
 
 	context.removable_relids = removable_relids;
 	context.except_relids = except_relids;
 	context.sublevels_up = 0;
+	context.remove_noop_phvs = remove_noop_phvs;
 	return query_or_expression_tree_mutator(node,
 											remove_nulling_relids_mutator,
 											&context,
@@ -1371,11 +1378,15 @@ remove_nulling_relids_mutator(Node *node,
 			!bms_overlap(phv->phrels, context->except_relids))
 		{
 			/*
-			 * Note: it might seem desirable to remove the PHV altogether if
-			 * phnullingrels goes to empty.  Currently we dare not do that
-			 * because we use PHVs in some cases to enforce separate identity
-			 * of subexpressions; see wrap_option usages in prepjointree.c.
+			 * If phnullingrels goes to empty, the PHV is no longer needed for
+			 * outer-join nulling.  We can remove it if we are allowed to
+			 * remove no-op PHVs, provided that it is not marked as preserved
+			 * (which indicates that it is needed to enforce separate identity
+			 * of subexpressions) and that the contained expression is safe to
+			 * pull up without breaking various invariants established by
+			 * expression preprocessing.
 			 */
+
 			/* Copy the PlaceHolderVar and mutate what's below ... */
 			phv = (PlaceHolderVar *)
 				expression_tree_mutator(node,
@@ -1388,6 +1399,14 @@ remove_nulling_relids_mutator(Node *node,
 			phv->phrels = bms_difference(phv->phrels,
 										 context->removable_relids);
 			Assert(!bms_is_empty(phv->phrels));
+
+			/* Strip the PHV if it's safe; see comment above */
+			if (context->remove_noop_phvs &&
+				!phv->phpreserved &&
+				bms_is_empty(phv->phnullingrels) &&
+				is_safe_to_strip_payload((Node *) phv->phexpr))
+				return (Node *) phv->phexpr;
+
 			return (Node *) phv;
 		}
 		/* Otherwise fall through to copy the PlaceHolderVar normally */
@@ -1408,6 +1427,46 @@ remove_nulling_relids_mutator(Node *node,
 	return expression_tree_mutator(node, remove_nulling_relids_mutator, context);
 }
 
+/*
+ * is_safe_to_strip_payload
+ *
+ * Check whether the given node is safe to pull up into the surrounding
+ * expression structure.
+ *
+ * This is used to remove a PlaceHolderVar.  Even if a PlaceHolderVar is no
+ * longer needed, we cannot simply remove it if doing so would expose the
+ * contained expression to a parent node in a way that breaks various
+ * invariants established by earlier expression preprocessing.
+ *
+ * Since we don't know what the parent node is, we need to be conservative.
+ */
+static bool
+is_safe_to_strip_payload(Node *node)
+{
+	if (node == NULL)
+		return false;
+
+	switch (nodeTag(node))
+	{
+		case T_Var:
+		case T_Param:
+		case T_Const:
+		case T_PlaceHolderVar:
+			/* Atomic nodes should be safe to expose */
+			return true;
+
+		case T_OpExpr:
+		case T_FuncExpr:
+		case T_CoalesceExpr:
+			return true;
+
+		default:
+			return false;
+	}
+
+	return false;
+}
+
 
 /*
  * replace_rte_variables() finds all Vars in an expression tree
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 29fec655593..fdf328c0fac 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -3655,7 +3655,7 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
 	 * extended statistics (see estimate_multivariate_ndistinct).  So strip
 	 * them out first.
 	 */
-	var = remove_nulling_relids(var, root->outer_join_rels, NULL);
+	var = remove_nulling_relids(var, root->outer_join_rels, NULL, false);
 
 	foreach(lc, varinfos)
 	{
@@ -4236,7 +4236,8 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
 				 * Clear nullingrels to correctly match hash keys.  See
 				 * add_unique_group_var()'s comment for details.
 				 */
-				expr = remove_nulling_relids(expr, root->outer_join_rels, NULL);
+				expr = remove_nulling_relids(expr, root->outer_join_rels,
+											 NULL, false);
 
 				/*
 				 * Detect and exclude exact duplicates from the list of hash
@@ -5762,7 +5763,8 @@ examine_variable(PlannerInfo *root, Node *node, int varRelid,
 		 * extended statistics.  So strip them out first.
 		 */
 		if (bms_overlap(varnos, root->outer_join_rels))
-			node = remove_nulling_relids(node, root->outer_join_rels, NULL);
+			node = remove_nulling_relids(node, root->outer_join_rels,
+										 NULL, false);
 
 		foreach(ilist, onerel->indexlist)
 		{
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 449885b9319..c4671ef362a 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -2985,6 +2985,11 @@ typedef struct MergeScanSelCache
  * level of a PlaceHolderVar might be a join rather than a base relation.
  * Likewise, phnullingrels corresponds to varnullingrels.
  *
+ * phpreserved indicates whether the PlaceHolderVar needs to be preserved
+ * when its phnullingrels becomes empty.  This is set true in cases where
+ * the PlaceHolderVar is used to enforce the separate identity of the
+ * contained expression.
+ *
  * Although the planner treats this as an expression node type, it is not
  * recognized by the parser or executor, so we declare it here rather than
  * in primnodes.h.
@@ -3018,6 +3023,9 @@ typedef struct PlaceHolderVar
 	/* RT indexes of outer joins that can null PHV's value */
 	Relids		phnullingrels;
 
+	/* true if PHV enforces separate identity */
+	bool		phpreserved pg_node_attr(equal_ignore);
+
 	/* ID for PHV (unique within planner run) */
 	Index		phid;
 
diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h
index f8216c22fb7..05245faa49c 100644
--- a/src/include/rewrite/rewriteManip.h
+++ b/src/include/rewrite/rewriteManip.h
@@ -89,7 +89,8 @@ extern Node *add_nulling_relids(Node *node,
 								const Bitmapset *added_relids);
 extern Node *remove_nulling_relids(Node *node,
 								   const Bitmapset *removable_relids,
-								   const Bitmapset *except_relids);
+								   const Bitmapset *except_relids,
+								   bool remove_noop_phvs);
 
 extern Node *replace_rte_variables(Node *node,
 								   int target_varno, int sublevels_up,
-- 
2.39.5 (Apple Git-154)

