From e155fce0fbad1e56404a813151195f55d7cbd997 Mon Sep 17 00:00:00 2001
From: Alexander Korotkov <akorotkov@postgresql.org>
Date: Wed, 2 Apr 2025 13:10:21 +0300
Subject: [PATCH v1] Disallow removing placeholders during Self-Join
 Elimination.

fc069a3a6319 implements Self-Join Elimination (SJE), which can remove base
elations when appropriate.  However, regressions tests for SJE only cover
the case when placeholder variables (PHVs) are evaluated and needed only
in a single base rel.  If this baserel is removed due to SJE, its clauses,
including PHVs, will be transferred to the keeping relation.  Removing these
PHVs may trigger an error on plan creation - thanks to the b3ff6c742f6c for
detecting that.

Reported-by: Alexander Lakhin <exclusion@gmail.com>
Author: Andrei Lepikhov <lepihov@gmail.com>
Author: Alena Rybakina <a.rybakina@postgrespro.ru>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
---
 src/backend/optimizer/plan/analyzejoins.c | 12 +++++++--
 src/test/regress/expected/join.out        | 31 +++++++++++++++++++++++
 src/test/regress/sql/join.sql             | 20 +++++++++++++++
 3 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c
index 8a8d4a2af33..492ac17b928 100644
--- a/src/backend/optimizer/plan/analyzejoins.c
+++ b/src/backend/optimizer/plan/analyzejoins.c
@@ -420,10 +420,18 @@ remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel,
 		PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
 
 		Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral));
-		if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
+		if (sjinfo != NULL &&
+			bms_is_subset(phinfo->ph_needed, joinrelids) &&
 			bms_is_member(relid, phinfo->ph_eval_at) &&
-			(sjinfo == NULL || !bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at)))
+			!bms_is_member(sjinfo->ojrelid, phinfo->ph_eval_at))
 		{
+			/*
+			 * This code shouldn't be executed if one relation is substituted
+			 * with another: in this case, the placeholder may be employed in
+			 * a filter inside the scan node the SJE removes.
+			 */
+			Assert(subst < 0);
+
 			root->placeholder_list = foreach_delete_current(root->placeholder_list,
 															l);
 			root->placeholder_array[phinfo->phid] = NULL;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 14da5708451..d3232c75f2e 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -7200,6 +7200,37 @@ on true;
          Output: t3.id, t1.id
 (7 rows)
 
+-- This is a degenerate case of PHV usage: it is evaluated and needed inside
+-- a baserel scan operation that the SJE removes.
+-- The PHV in this test should be in the filter of parameterised Index Scan:
+-- replace_nestloop_params code will detect if the placeholder list doesn't have
+-- a reference to this parameter.
+--
+-- NOTE:  enable_hashjoin and enable_mergejoin must be disabled.
+CREATE TABLE tbl_phv(x int, y int PRIMARY KEY);
+CREATE INDEX tbl_phv_idx ON tbl_phv(x);
+INSERT INTO tbl_phv (x, y)
+  SELECT gs, gs FROM generate_series(1,100) AS gs;
+VACUUM ANALYZE tbl_phv;
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT 1 FROM tbl_phv t1 LEFT JOIN
+  (SELECT 1 extra, x, y FROM tbl_phv tl) t3 JOIN
+    (SELECT y FROM tbl_phv tr) t4
+  ON t4.y = t3.y
+ON true WHERE t3.extra IS NOT NULL AND t3.x=t1.x % 2;
+                       QUERY PLAN                        
+---------------------------------------------------------
+ Nested Loop
+   Output: 1
+   ->  Seq Scan on public.tbl_phv t1
+         Output: t1.x, t1.y
+   ->  Index Scan using tbl_phv_idx on public.tbl_phv tr
+         Output: tr.x, tr.y
+         Index Cond: (tr.x = (t1.x % 2))
+         Filter: (1 IS NOT NULL)
+(8 rows)
+
+DROP TABLE IF EXISTS tbl_phv;
 -- Check that SJE replaces join clauses involving the removed rel correctly
 explain (costs off)
 select * from emp1 t1
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index c29d13b9fed..a9cdd4fe7b9 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -2774,6 +2774,26 @@ select * from generate_series(1,10) t1(id) left join
     lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id)
 on true;
 
+-- This is a degenerate case of PHV usage: it is evaluated and needed inside
+-- a baserel scan operation that the SJE removes.
+-- The PHV in this test should be in the filter of parameterised Index Scan:
+-- replace_nestloop_params code will detect if the placeholder list doesn't have
+-- a reference to this parameter.
+--
+-- NOTE:  enable_hashjoin and enable_mergejoin must be disabled.
+CREATE TABLE tbl_phv(x int, y int PRIMARY KEY);
+CREATE INDEX tbl_phv_idx ON tbl_phv(x);
+INSERT INTO tbl_phv (x, y)
+  SELECT gs, gs FROM generate_series(1,100) AS gs;
+VACUUM ANALYZE tbl_phv;
+EXPLAIN (COSTS OFF, VERBOSE)
+SELECT 1 FROM tbl_phv t1 LEFT JOIN
+  (SELECT 1 extra, x, y FROM tbl_phv tl) t3 JOIN
+    (SELECT y FROM tbl_phv tr) t4
+  ON t4.y = t3.y
+ON true WHERE t3.extra IS NOT NULL AND t3.x=t1.x % 2;
+DROP TABLE IF EXISTS tbl_phv;
+
 -- Check that SJE replaces join clauses involving the removed rel correctly
 explain (costs off)
 select * from emp1 t1
-- 
2.39.5 (Apple Git-154)

