From ae1ef6b8f5ad8b4bbbf28bf8565a088ff9b854de Mon Sep 17 00:00:00 2001
From: Richard Guo <guofenglinux@gmail.com>
Date: Thu, 26 Feb 2026 09:51:57 +0900
Subject: [PATCH v2] Convert ALL SubLinks to ANY SubLinks

PostgreSQL's planner is highly optimized for ANY SubLinks, offering
features like hashed SubPlans and the ability to flatten them into
semijoins or anti-semijoins.  ALL SubLinks, however, have historically
been excluded from these optimizations.

This patch teaches the planner to convert ALL SubLinks to ANY SubLinks
by applying operator negation and De Morgan's laws.  Specifically:

  foo op ALL (sub-SELECT) => NOT (foo negator_op ANY (sub-SELECT))
  NOT (foo op ALL (sub-SELECT)) => foo negator_op ANY (sub-SELECT)

This unlocks a richer set of optimizations.  At a minimum, this makes
it possible for the executor to evaluate the unflattened sublink using
a hashed SubPlan.  Furthermore, it exposes the sublink to the standard
pull-up machinery, potentially flattening it into a semijoin or
anti-semijoin.

In the worst-case scenario where the transformed ANY sublink cannot be
pulled up and cannot be hashed, execution falls back to a nested-loop
SubPlan.  Performance in this scenario is effectively identical to the
legacy ALL SubPlan, ensuring no regressions.  Because the operator is
negated, the ANY subplan will short-circuit on the exact same inner
tuple that the ALL subplan would have short-circuited on.  The only
added overhead is a single boolean NOT inversion per outer tuple,
which is negligible compared to the cost of the nested-loop execution
itself.
---
 src/backend/optimizer/prep/prepjointree.c | 198 +++++++++++++++++++++-
 src/test/regress/expected/subselect.out   | 163 +++++++++++++++++-
 src/test/regress/sql/subselect.sql        |  70 +++++++-
 3 files changed, 422 insertions(+), 9 deletions(-)

diff --git a/src/backend/optimizer/prep/prepjointree.c b/src/backend/optimizer/prep/prepjointree.c
index d5e1041ffa3..0d2d3f121e1 100644
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -44,6 +44,7 @@
 #include "parser/parsetree.h"
 #include "rewrite/rewriteHandler.h"
 #include "rewrite/rewriteManip.h"
+#include "utils/lsyscache.h"
 #include "utils/rel.h"
 
 
@@ -113,6 +114,7 @@ static Node *pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
 static Node *pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 										   Node **jtlink1, Relids available_rels1,
 										   Node **jtlink2, Relids available_rels2);
+static Node *negate_sublink_testexpr(Node *testexpr);
 static Node *pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
 										JoinExpr *lowest_outer_join,
 										AppendRelInfo *containing_appendrel);
@@ -622,7 +624,9 @@ replace_empty_jointree(Query *parse)
 /*
  * pull_up_sublinks
  *		Attempt to pull up ANY and EXISTS SubLinks to be treated as
- *		semijoins or anti-semijoins.
+ *		semijoins or anti-semijoins.  We also transform ALL SubLinks
+ *		to ANY SubLinks if possible to unlock hashed SubPlan execution
+ *		and enable potential pull-up.
  *
  * A clause "foo op ANY (sub-SELECT)" can be processed by pulling the
  * sub-SELECT up to become a rangetable entry and treating the implied
@@ -639,6 +643,20 @@ replace_empty_jointree(Query *parse)
  * Under similar conditions, EXISTS and NOT EXISTS clauses can be handled
  * by pulling up the sub-SELECT and creating a semijoin or anti-semijoin.
  *
+ * A negated clause "NOT (foo op ANY (sub-SELECT))" can be handled
+ * similarly by creating an anti-semijoin.  However, this transformation
+ * is much more restricted than a positive ANY pull-up: to safely bypass
+ * standard SQL's 3-valued logic, we must rigidly prove that both the
+ * outer test expression and the subquery's output expression are strictly
+ * non-nullable, and that the operator itself cannot return NULL for
+ * non-null inputs.
+ *
+ * A clause "foo op ALL (sub-SELECT)" can be logically rewritten into "NOT
+ * (foo negator_op ANY (sub-SELECT))".  This conversion makes it possible
+ * for the executor to evaluate the unflattened sublink using a hashed
+ * SubPlan.  Furthermore, it exposes the sublink to the standard pull-up
+ * machinery, potentially flattening it into a semijoin or anti-semijoin.
+ *
  * This routine searches for such clauses and does the necessary parsetree
  * transformations if any are found.
  *
@@ -844,7 +862,7 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 		JoinExpr   *j;
 		Relids		child_rels;
 
-		/* Is it a convertible ANY or EXISTS clause? */
+		/* Is it a convertible ANY, EXISTS or ALL clause? */
 		if (sublink->subLinkType == ANY_SUBLINK)
 		{
 			ScalarArrayOpExpr *saop;
@@ -966,12 +984,45 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 				return NULL;
 			}
 		}
+		else if (sublink->subLinkType == ALL_SUBLINK)
+		{
+			Node	   *negated_expr = negate_sublink_testexpr(sublink->testexpr);
+
+			if (negated_expr != NULL)
+			{
+				SubLink    *any_sublink;
+				Node	   *not_expr;
+
+				any_sublink = makeNode(SubLink);
+				any_sublink->subLinkType = ANY_SUBLINK;
+				any_sublink->subLinkId = 0;
+				any_sublink->testexpr = negated_expr;
+				any_sublink->operName = sublink->operName;
+				any_sublink->subselect = sublink->subselect;
+				any_sublink->location = sublink->location;
+				/* XXX should we update operName accordingly */
+
+				not_expr = (Node *) makeBoolExpr(NOT_EXPR,
+												 list_make1(any_sublink),
+												 any_sublink->location);
+
+				return pull_up_sublinks_qual_recurse(root,
+													 not_expr,
+													 jtlink1,
+													 available_rels1,
+													 jtlink2,
+													 available_rels2);
+			}
+		}
 		/* Else return it unmodified */
 		return node;
 	}
 	if (is_notclause(node))
 	{
-		/* If the immediate argument of NOT is ANY or EXISTS, try to convert */
+		/*
+		 * If the immediate argument of NOT is ANY, EXISTS or ALL, try to
+		 * convert.
+		 */
 		SubLink    *sublink = (SubLink *) get_notclausearg((Expr *) node);
 		JoinExpr   *j;
 		Relids		child_rels;
@@ -1086,6 +1137,31 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 					return NULL;
 				}
 			}
+			else if (sublink->subLinkType == ALL_SUBLINK)
+			{
+				Node	   *negated_expr = negate_sublink_testexpr(sublink->testexpr);
+
+				if (negated_expr != NULL)
+				{
+					SubLink    *any_sublink;
+
+					any_sublink = makeNode(SubLink);
+					any_sublink->subLinkType = ANY_SUBLINK;
+					any_sublink->subLinkId = 0;
+					any_sublink->testexpr = negated_expr;
+					any_sublink->operName = sublink->operName;
+					any_sublink->subselect = sublink->subselect;
+					any_sublink->location = sublink->location;
+					/* XXX should we update operName accordingly */
+
+					return pull_up_sublinks_qual_recurse(root,
+														 (Node *) any_sublink,
+														 jtlink1,
+														 available_rels1,
+														 jtlink2,
+														 available_rels2);
+				}
+			}
 		}
 		/* Else return it unmodified */
 		return node;
@@ -1122,6 +1198,122 @@ pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
 	return node;
 }
 
+/*
+ * negate_sublink_testexpr
+ *		Attempt to logically negate the testexpr of an ALL_SUBLINK.
+ *
+ * This helper is used to transform ALL sublinks into ANY sublinks.  It returns
+ * a newly allocated negated expression tree, or NULL if negation is not
+ * possible.
+ */
+static Node *
+negate_sublink_testexpr(Node *testexpr)
+{
+	if (testexpr == NULL)
+		return NULL;
+	if (IsA(testexpr, OpExpr))
+	{
+		/* single-column comparison */
+		OpExpr	   *opexpr = (OpExpr *) testexpr;
+		Oid			negator = get_negator(opexpr->opno);
+
+		if (OidIsValid(negator))
+		{
+			OpExpr	   *newopexpr = makeNode(OpExpr);
+
+			newopexpr->opno = negator;
+			newopexpr->opfuncid = InvalidOid;
+			newopexpr->opresulttype = opexpr->opresulttype;
+			newopexpr->opretset = opexpr->opretset;
+			newopexpr->opcollid = opexpr->opcollid;
+			newopexpr->inputcollid = opexpr->inputcollid;
+			newopexpr->args = opexpr->args;
+			newopexpr->location = opexpr->location;
+			return (Node *) newopexpr;
+		}
+	}
+	else if (is_andclause(testexpr) || is_orclause(testexpr))
+	{
+		/* multi-column equality or inequality checks */
+		BoolExpr   *bexpr = (BoolExpr *) testexpr;
+		List	   *nargs = NIL;
+
+		/*--------------------
+		 * Apply DeMorgan's Laws:
+		 *		(NOT (AND A B)) => (OR (NOT A) (NOT B))
+		 *		(NOT (OR A B))	=> (AND (NOT A) (NOT B))
+		 * i.e., swap AND for OR and negate each subclause.
+		 *--------------------
+		 */
+		foreach_ptr(Node, arg, bexpr->args)
+		{
+			Node	   *negated_arg = negate_sublink_testexpr(arg);
+
+			if (negated_arg == NULL)
+				return NULL;
+
+			nargs = lappend(nargs, negated_arg);
+		}
+
+		return (bexpr->boolop == AND_EXPR) ?
+			(Node *) makeBoolExpr(OR_EXPR, nargs, bexpr->location) :
+			(Node *) makeBoolExpr(AND_EXPR, nargs, bexpr->location);
+	}
+	else if (IsA(testexpr, RowCompareExpr))
+	{
+		/* multi-column ordering checks */
+		RowCompareExpr *rcexpr = (RowCompareExpr *) testexpr;
+		RowCompareExpr *newrcexpr;
+		List	   *negated_opnos = NIL;
+		CompareType negated_cmptype;
+
+		foreach_oid(opno, rcexpr->opnos)
+		{
+			Oid			negator = get_negator(opno);
+
+			if (!OidIsValid(negator))
+				return NULL;
+
+			negated_opnos = lappend_oid(negated_opnos, negator);
+		}
+
+		switch (rcexpr->cmptype)
+		{
+			case COMPARE_LT:
+				negated_cmptype = COMPARE_GE;
+				break;
+			case COMPARE_LE:
+				negated_cmptype = COMPARE_GT;
+				break;
+			case COMPARE_GE:
+				negated_cmptype = COMPARE_LT;
+				break;
+			case COMPARE_GT:
+				negated_cmptype = COMPARE_LE;
+				break;
+			default:
+				/* EQ and NE cases aren't allowed here */
+				elog(ERROR, "unrecognized compare type: %d",
+					 (int) rcexpr->cmptype);
+				negated_cmptype = COMPARE_INVALID;	/* keep compiler quiet */
+				break;
+		}
+
+		newrcexpr = makeNode(RowCompareExpr);
+
+		newrcexpr->cmptype = negated_cmptype;
+		newrcexpr->opnos = negated_opnos;
+		newrcexpr->opfamilies = rcexpr->opfamilies;
+		newrcexpr->inputcollids = rcexpr->inputcollids;
+		newrcexpr->largs = rcexpr->largs;
+		newrcexpr->rargs = rcexpr->rargs;
+
+		return (Node *) newrcexpr;
+	}
+
+	return NULL;
+}
+
 /*
  * preprocess_function_rtes
  *		Constant-simplify any FUNCTION RTEs in the FROM clause, and then
diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out
index 200236a0a69..a46c68f9d1d 100644
--- a/src/test/regress/expected/subselect.out
+++ b/src/test/regress/expected/subselect.out
@@ -3324,13 +3324,16 @@ SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
 (1 row)
 
 --
--- Check NOT IN performs an ANTI JOIN when both the outer query's expressions
--- and the sub-select's output columns are provably non-nullable, and the
--- operator itself cannot return NULL for non-null inputs.
+-- Test cases for NOT ANY transformations and ALL transformations
 --
 BEGIN;
 CREATE TEMP TABLE not_null_tab (id int NOT NULL, val int NOT NULL);
 CREATE TEMP TABLE null_tab (id int, val int);
+--
+-- Check NOT IN performs an ANTI JOIN when both the outer query's expressions
+-- and the sub-select's output columns are provably non-nullable, and the
+-- operator itself cannot return NULL for non-null inputs.
+--
 -- ANTI JOIN: both sides are defined NOT NULL
 EXPLAIN (COSTS OFF)
 SELECT * FROM not_null_tab
@@ -3761,4 +3764,158 @@ WHERE NOT id ?= ANY (SELECT id FROM not_null_tab);
            ->  Seq Scan on not_null_tab not_null_tab_1
 (5 rows)
 
+--
+-- Check ALL SubLink is converted to ANY SubLink if negated testexpr exists
+--
+-- Ensure we get a hashed ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE id <> ALL (SELECT id FROM null_tab);
+                        QUERY PLAN                        
+----------------------------------------------------------
+ Seq Scan on not_null_tab
+   Filter: (NOT (ANY (id = (hashed SubPlan any_1).col1)))
+   SubPlan any_1
+     ->  Seq Scan on null_tab
+(4 rows)
+
+-- Ensure we get a hashed ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) <> ALL (SELECT id, val FROM null_tab);
+                                             QUERY PLAN                                             
+----------------------------------------------------------------------------------------------------
+ Seq Scan on not_null_tab
+   Filter: (NOT (ANY ((id = (hashed SubPlan any_1).col1) AND (val = (hashed SubPlan any_1).col2))))
+   SubPlan any_1
+     ->  Seq Scan on null_tab
+(4 rows)
+
+-- Ensure we get an ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) > ALL (SELECT id, val FROM null_tab);
+                                       QUERY PLAN                                        
+-----------------------------------------------------------------------------------------
+ Seq Scan on not_null_tab
+   Filter: (NOT (ANY (ROW(id, val) <= ROW((SubPlan any_1).col1, (SubPlan any_1).col2))))
+   SubPlan any_1
+     ->  Materialize
+           ->  Seq Scan on null_tab
+(5 rows)
+
+-- Ensure we get an anti-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE id <> ALL (SELECT id FROM not_null_tab);
+                     QUERY PLAN                      
+-----------------------------------------------------
+ Hash Anti Join
+   Hash Cond: (not_null_tab.id = not_null_tab_1.id)
+   ->  Seq Scan on not_null_tab
+   ->  Hash
+         ->  Seq Scan on not_null_tab not_null_tab_1
+(5 rows)
+
+-- Ensure we get an anti-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) <> ALL (SELECT id, val FROM not_null_tab);
+                                            QUERY PLAN                                             
+---------------------------------------------------------------------------------------------------
+ Merge Anti Join
+   Merge Cond: ((not_null_tab.id = not_null_tab_1.id) AND (not_null_tab.val = not_null_tab_1.val))
+   ->  Sort
+         Sort Key: not_null_tab.id, not_null_tab.val
+         ->  Seq Scan on not_null_tab
+   ->  Sort
+         Sort Key: not_null_tab_1.id, not_null_tab_1.val
+         ->  Seq Scan on not_null_tab not_null_tab_1
+(8 rows)
+
+-- Ensure we get an anti-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) > ALL (SELECT id, val FROM not_null_tab);
+                                              QUERY PLAN                                               
+-------------------------------------------------------------------------------------------------------
+ Nested Loop Anti Join
+   Join Filter: (ROW(not_null_tab.id, not_null_tab.val) <= ROW(not_null_tab_1.id, not_null_tab_1.val))
+   ->  Seq Scan on not_null_tab
+   ->  Materialize
+         ->  Seq Scan on not_null_tab not_null_tab_1
+(5 rows)
+
+-- Ensure we get a semi-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab
+WHERE NOT id <> ALL (SELECT id FROM null_tab);
+                    QUERY PLAN                     
+---------------------------------------------------
+ Hash Join
+   Hash Cond: (null_tab.id = null_tab_1.id)
+   ->  Seq Scan on null_tab
+   ->  Hash
+         ->  HashAggregate
+               Group Key: null_tab_1.id
+               ->  Seq Scan on null_tab null_tab_1
+(7 rows)
+
+-- Ensure we get a hashed ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab t1
+LEFT JOIN null_tab t2
+ON NOT t1.id <> ALL (SELECT id FROM null_tab);
+                         QUERY PLAN                         
+------------------------------------------------------------
+ Nested Loop Left Join
+   Join Filter: (ANY (t1.id = (hashed SubPlan any_1).col1))
+   ->  Seq Scan on null_tab t1
+   ->  Materialize
+         ->  Seq Scan on null_tab t2
+   SubPlan any_1
+     ->  Seq Scan on null_tab
+(7 rows)
+
+-- Ensure we get a semi-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab
+WHERE NOT (id, val) <> ALL (SELECT id, val FROM null_tab);
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Hash Join
+   Hash Cond: ((null_tab.id = null_tab_1.id) AND (null_tab.val = null_tab_1.val))
+   ->  Seq Scan on null_tab
+   ->  Hash
+         ->  HashAggregate
+               Group Key: null_tab_1.id, null_tab_1.val
+               ->  Seq Scan on null_tab null_tab_1
+(7 rows)
+
+-- Ensure we get a semi-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab
+WHERE NOT (id, val) > ALL (SELECT id, val FROM null_tab);
+                                      QUERY PLAN                                       
+---------------------------------------------------------------------------------------
+ Nested Loop Semi Join
+   Join Filter: (ROW(null_tab.id, null_tab.val) <= ROW(null_tab_1.id, null_tab_1.val))
+   ->  Seq Scan on null_tab
+   ->  Materialize
+         ->  Seq Scan on null_tab null_tab_1
+(5 rows)
+
+-- Ensure we get ALL SubPlan, as the operator has no negator
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE id ?= ALL (SELECT id FROM not_null_tab);
+                      QUERY PLAN                       
+-------------------------------------------------------
+ Seq Scan on not_null_tab
+   Filter: (ALL (id ?= (SubPlan all_1).col1))
+   SubPlan all_1
+     ->  Materialize
+           ->  Seq Scan on not_null_tab not_null_tab_1
+(5 rows)
+
 ROLLBACK;
diff --git a/src/test/regress/sql/subselect.sql b/src/test/regress/sql/subselect.sql
index 4cd016f4ac3..58d993f0422 100644
--- a/src/test/regress/sql/subselect.sql
+++ b/src/test/regress/sql/subselect.sql
@@ -1450,9 +1450,7 @@ EXPLAIN (COSTS OFF)
 SELECT ten FROM onek t WHERE 1.0::integer IN ((VALUES (1), (3)));
 
 --
--- Check NOT IN performs an ANTI JOIN when both the outer query's expressions
--- and the sub-select's output columns are provably non-nullable, and the
--- operator itself cannot return NULL for non-null inputs.
+-- Test cases for NOT ANY transformations and ALL transformations
 --
 
 BEGIN;
@@ -1460,6 +1458,12 @@ BEGIN;
 CREATE TEMP TABLE not_null_tab (id int NOT NULL, val int NOT NULL);
 CREATE TEMP TABLE null_tab (id int, val int);
 
+--
+-- Check NOT IN performs an ANTI JOIN when both the outer query's expressions
+-- and the sub-select's output columns are provably non-nullable, and the
+-- operator itself cannot return NULL for non-null inputs.
+--
+
 -- ANTI JOIN: both sides are defined NOT NULL
 EXPLAIN (COSTS OFF)
 SELECT * FROM not_null_tab
@@ -1632,4 +1636,64 @@ EXPLAIN (COSTS OFF)
 SELECT * FROM not_null_tab
 WHERE NOT id ?= ANY (SELECT id FROM not_null_tab);
 
+--
+-- Check ALL SubLink is converted to ANY SubLink if negated testexpr exists
+--
+
+-- Ensure we get a hashed ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE id <> ALL (SELECT id FROM null_tab);
+
+-- Ensure we get a hashed ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) <> ALL (SELECT id, val FROM null_tab);
+
+-- Ensure we get an ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) > ALL (SELECT id, val FROM null_tab);
+
+-- Ensure we get an anti-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE id <> ALL (SELECT id FROM not_null_tab);
+
+-- Ensure we get an anti-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) <> ALL (SELECT id, val FROM not_null_tab);
+
+-- Ensure we get an anti-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE (id, val) > ALL (SELECT id, val FROM not_null_tab);
+
+-- Ensure we get a semi-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab
+WHERE NOT id <> ALL (SELECT id FROM null_tab);
+
+-- Ensure we get a hashed ANY SubPlan
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab t1
+LEFT JOIN null_tab t2
+ON NOT t1.id <> ALL (SELECT id FROM null_tab);
+
+-- Ensure we get a semi-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab
+WHERE NOT (id, val) <> ALL (SELECT id, val FROM null_tab);
+
+-- Ensure we get a semi-join
+EXPLAIN (COSTS OFF)
+SELECT * FROM null_tab
+WHERE NOT (id, val) > ALL (SELECT id, val FROM null_tab);
+
+-- Ensure we get ALL SubPlan, as the operator has no negator
+EXPLAIN (COSTS OFF)
+SELECT * FROM not_null_tab
+WHERE id ?= ALL (SELECT id FROM not_null_tab);
+
 ROLLBACK;
-- 
2.39.5 (Apple Git-154)