This is an automated email from the ASF dual-hosted git repository.
mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a7ad76ae570 [fix](be) Preserve null probe rows in mark anti join
(#63767)
a7ad76ae570 is described below
commit a7ad76ae5704536aa70f400d6d9bbad2be68bc50
Author: Jerry Hu <[email protected]>
AuthorDate: Fri May 29 18:42:36 2026 +0800
[fix](be) Preserve null probe rows in mark anti join (#63767)
### What problem does this PR solve?
Issue Number: None
Related PR: None
Problem Summary: Correlated `NOT IN` subqueries under disjunction can be
rewritten to a mark null-aware left anti join with additional join
conjuncts. When the probe join key is `NULL`, the hash table lookup
advanced the probe index before the caller could run the null-probe
handling path. As a result, the probe row was skipped before the mark
column was evaluated by the outer disjunction, producing incomplete
query results. This change keeps the probe index on the `NULL` row so
the null-aware join path can emit the correct mark value.
### Release note
Fix incorrect results for correlated `NOT IN` subqueries combined with
disjunctions.
### Check List (For Author)
- Test:
- Regression test: `doris-local-regression.sh --network 10.26.20.3/24
run -d correctness -s test_subquery_in_disjunction -forceGenOut`
- Regression test: `doris-local-regression.sh --network 10.26.20.3/24
run -d correctness -s test_subquery_in_disjunction`
- Manual test: verified the `NOT IN` + `OR` reproducer before and after
the fix on a local FE/BE cluster
- Build: `./build.sh --be`
- Behavior changed: Yes. Corrects query result semantics for affected
null-aware mark anti joins.
- Does this need documentation: No
---
be/src/exec/common/hash_table/join_hash_table.h | 5 +-
.../correctness/test_subquery_in_disjunction.out | 12 ++++
.../test_subquery_in_disjunction.groovy | 81 ++++++++++++++++++----
3 files changed, 83 insertions(+), 15 deletions(-)
diff --git a/be/src/exec/common/hash_table/join_hash_table.h
b/be/src/exec/common/hash_table/join_hash_table.h
index 9b63040330b..bc737352f41 100644
--- a/be/src/exec/common/hash_table/join_hash_table.h
+++ b/be/src/exec/common/hash_table/join_hash_table.h
@@ -480,7 +480,8 @@ private:
/// If the probe key is null
if constexpr (has_null_map) {
if (null_map[probe_idx]) {
- probe_idx++;
+ build_idx = 0;
+ picking_null_keys = false;
break;
}
}
@@ -512,4 +513,4 @@ private:
template <typename Key, typename Hash, bool DirectMapping>
using JoinHashMap = JoinHashTable<Key, Hash, DirectMapping>;
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/regression-test/data/correctness/test_subquery_in_disjunction.out
b/regression-test/data/correctness/test_subquery_in_disjunction.out
index 65dbd980472..7b253ee410d 100644
--- a/regression-test/data/correctness/test_subquery_in_disjunction.out
+++ b/regression-test/data/correctness/test_subquery_in_disjunction.out
@@ -66,6 +66,18 @@
1 2 3
10 20 30
+-- !not_in_nullable_mark_join --
+1 0
+11 \N
+
+-- !not_in_nullable_mark_join_in_disjunction --
+1 0
+11 \N
+
+-- !not_in_nullable_mark_join_in_disjunction_build_null --
+1 0
+11 \N
+
-- !mark_join_with_other_conjuncts1 --
1 2
1 3
diff --git
a/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
b/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
index 2decf0583f2..384103d2c14 100644
--- a/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
+++ b/regression-test/suites/correctness/test_subquery_in_disjunction.groovy
@@ -97,22 +97,21 @@ suite("test_subquery_in_disjunction") {
SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2
WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
"""
- // TODO: enable this after DORIS-7051 and DORIS-7052 is fixed
- // qt_hash_join_with_other_conjuncts5 """
- // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM
test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
- // """
+ qt_hash_join_with_other_conjuncts5 """
+ SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2
WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
+ """
- // qt_hash_join_with_other_conjuncts6 """
- // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM
test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
- // """
+ qt_hash_join_with_other_conjuncts6 """
+ SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2
WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 10 ORDER BY c1;
+ """
- // qt_hash_join_with_other_conjuncts7 """
- // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM
test_sq_dj2 WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
- // """
+ qt_hash_join_with_other_conjuncts7 """
+ SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2
WHERE test_sq_dj1.c1 > test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
+ """
- // qt_hash_join_with_other_conjuncts8 """
- // SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM
test_sq_dj2 WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
- // """
+ qt_hash_join_with_other_conjuncts8 """
+ SELECT * FROM test_sq_dj1 WHERE c1 NOT IN (SELECT c1 FROM test_sq_dj2
WHERE test_sq_dj1.c1 < test_sq_dj2.c2) OR c1 < 11 ORDER BY c1;
+ """
qt_same_subquery_in_conjuncts """
SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR
c1 IN (SELECT c1 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1;
@@ -122,6 +121,62 @@ suite("test_subquery_in_disjunction") {
SELECT * FROM test_sq_dj1 WHERE c1 IN (SELECT c1 FROM test_sq_dj2) OR
c1 IN (SELECT c2 FROM test_sq_dj2) OR c1 < 10 ORDER BY c1;
"""
+ sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_outer """
+ sql """ DROP TABLE IF EXISTS test_sq_dj_nullable_inner """
+ sql """
+ CREATE TABLE `test_sq_dj_nullable_outer` (
+ `id` int(11) NULL,
+ `a` int(11) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """
+ CREATE TABLE `test_sq_dj_nullable_inner` (
+ `id` int(11) NULL,
+ `a` int(11) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_num" = "1"
+ );
+ """
+ sql """ INSERT INTO test_sq_dj_nullable_outer VALUES (1, 0), (11, NULL) """
+ sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (1, 10) """
+
+ order_qt_not_in_nullable_mark_join """
+ SELECT id, a FROM test_sq_dj_nullable_outer o
+ WHERE o.a NOT IN (
+ SELECT i.a FROM test_sq_dj_nullable_inner i
+ WHERE i.id > o.id AND i.a IS NOT NULL
+ )
+ ORDER BY id;
+ """
+
+ order_qt_not_in_nullable_mark_join_in_disjunction """
+ SELECT id, a FROM test_sq_dj_nullable_outer o
+ WHERE o.a NOT IN (
+ SELECT i.a FROM test_sq_dj_nullable_inner i
+ WHERE i.id > o.id AND i.a IS NOT NULL
+ ) OR o.id IN (1, 11)
+ ORDER BY id;
+ """
+
+ sql """ INSERT INTO test_sq_dj_nullable_inner VALUES (20, NULL) """
+
+ order_qt_not_in_nullable_mark_join_in_disjunction_build_null """
+ SELECT id, a FROM test_sq_dj_nullable_outer o
+ WHERE o.a NOT IN (
+ SELECT i.a FROM test_sq_dj_nullable_inner i
+ WHERE i.id > o.id
+ ) OR o.id IN (1, 11)
+ ORDER BY id;
+ """
+
// test mark join that one probe row matches multiple build rows
sql """drop table if exists sub_query_correlated_subquery1;"""
sql """create table if not exists sub_query_correlated_subquery1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]