This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 805520bfe4d [Bug](join) fix single null eq get wrong result (#44613)
805520bfe4d is described below

commit 805520bfe4d90965bbd844945bd302eff38dc8f3
Author: Pxl <x...@selectdb.com>
AuthorDate: Wed Nov 27 11:22:57 2024 +0800

    [Bug](join) fix single null eq get wrong result (#44613)
    
    introduced by https://github.com/apache/doris/pull/42398
    In order to make the null keys equal when using single null eq, all null
    keys need to be set to default value.
---
 be/src/pipeline/exec/hashjoin_build_sink.h          |  5 ++++-
 .../exec/join/process_hash_table_probe_impl.h       |  4 ++++
 .../data/nereids_p0/join/test_mark_join.out         |  7 +++++++
 regression-test/data/query_p0/join/test_join.out    |  6 ++++++
 .../suites/nereids_p0/join/test_mark_join.groovy    | 21 +++++++++++++++++++++
 .../suites/query_p0/join/test_join.groovy           | 13 ++++++++++++-
 6 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h 
b/be/src/pipeline/exec/hashjoin_build_sink.h
index d905afa2758..cc78e6a769f 100644
--- a/be/src/pipeline/exec/hashjoin_build_sink.h
+++ b/be/src/pipeline/exec/hashjoin_build_sink.h
@@ -197,7 +197,10 @@ struct ProcessHashTableBuild {
         SCOPED_TIMER(_parent->_build_table_insert_timer);
         hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows, 
_batch_size,
                                                                       
*has_null_key);
-
+        // In order to make the null keys equal when using single null eq, all 
null keys need to be set to default value.
+        if (_build_raw_ptrs.size() == 1 && null_map) {
+            
_build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data());
+        }
         hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows,
                                             null_map ? null_map->data() : 
nullptr, true, true,
                                             
hash_table_ctx.hash_table->get_bucket_size());
diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h 
b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
index 05cd3d7d9e0..fc1153b3419 100644
--- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h
@@ -173,6 +173,10 @@ typename HashTableType::State 
ProcessHashTableProbe<JoinOpType>::_init_probe_sid
     if (!_parent->_ready_probe) {
         _parent->_ready_probe = true;
         hash_table_ctx.reset();
+        // In order to make the null keys equal when using single null eq, all 
null keys need to be set to default value.
+        if (_parent->_probe_columns.size() == 1 && null_map) {
+            
_parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map);
+        }
         hash_table_ctx.init_serialized_keys(_parent->_probe_columns, 
probe_rows, null_map, true,
                                             false, 
hash_table_ctx.hash_table->get_bucket_size());
         hash_table_ctx.hash_table->pre_build_idxs(hash_table_ctx.bucket_nums,
diff --git a/regression-test/data/nereids_p0/join/test_mark_join.out 
b/regression-test/data/nereids_p0/join/test_mark_join.out
index 59fc7d651ad..a6035556674 100644
--- a/regression-test/data/nereids_p0/join/test_mark_join.out
+++ b/regression-test/data/nereids_p0/join/test_mark_join.out
@@ -53,3 +53,10 @@
 -- !mark_join_null_conjunct --
 \N
 
+-- !mark_join8 --
+1      1       1       false
+2      2       2       false
+3      \N      \N      \N
+3      \N      3       \N
+4      \N      4       true
+
diff --git a/regression-test/data/query_p0/join/test_join.out 
b/regression-test/data/query_p0/join/test_join.out
index 3814530e0c6..7cbaf4f7750 100644
--- a/regression-test/data/query_p0/join/test_join.out
+++ b/regression-test/data/query_p0/join/test_join.out
@@ -3243,3 +3243,9 @@ false     true    true    false   false
 -- !sql --
 4
 
+-- !sql --
+2      \N
+
+-- !sql --
+2      \N
+
diff --git a/regression-test/suites/nereids_p0/join/test_mark_join.groovy 
b/regression-test/suites/nereids_p0/join/test_mark_join.groovy
index 3b07c357b5d..034900f81ac 100644
--- a/regression-test/suites/nereids_p0/join/test_mark_join.groovy
+++ b/regression-test/suites/nereids_p0/join/test_mark_join.groovy
@@ -142,4 +142,25 @@ suite("test_mark_join", "nereids_p0") {
     """
 
     qt_mark_join_null_conjunct """select null in ( select k1 from 
test_mark_join_t1);"""
+
+    qt_mark_join8 """
+            select
+                k1,
+                k2,
+                k3,
+                k1 not in (
+                    select
+                        test_mark_join_t2.k2
+                    from
+                        test_mark_join_t2
+                    where
+                        test_mark_join_t2.k3 <=> test_mark_join_t1.k3
+                ) vv
+            from
+                test_mark_join_t1
+            order by
+                1,
+                2,
+                3;
+    """
 }
diff --git a/regression-test/suites/query_p0/join/test_join.groovy 
b/regression-test/suites/query_p0/join/test_join.groovy
index e75878cb032..fa83fddbd41 100644
--- a/regression-test/suites/query_p0/join/test_join.groovy
+++ b/regression-test/suites/query_p0/join/test_join.groovy
@@ -720,7 +720,7 @@ suite("test_join", "query,p0") {
     }
 
     qt_left_anti_join_with_other_pred "select b.k1 from baseall b left anti 
join test t on b.k1 = t.k1 and 1 = 2 order by b.k1"
-
+    // null not in (1,2,3,null) = true
     qt_left_anti_join_null_1 "select b.k1 from baseall b left anti join test t 
on b.k1 = t.k1 order by b.k1"
 
     qt_left_anti_join_null_2 "select b.k1 from baseall b left anti join 
test_join_empty_view t on b.k1 = t.k1 order by b.k1"
@@ -931,6 +931,7 @@ suite("test_join", "query,p0") {
     // https://github.com/apache/doris/issues/4210
     qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from 
test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2)
            order by k1"""
+    // null not in (1,2,3) = false
     qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1 
from baseall where k1 is not null )"""
 
 
@@ -1330,4 +1331,14 @@ suite("test_join", "query,p0") {
     qt_sql """ select /*+SET_VAR(batch_size=1, disable_join_reorder=true)*/ 
count(DISTINCT dcqewrt.engineer)  as active_person_count from tbl1 dcqewrt left 
join [broadcast] tbl2 dd on dd.data_dt = dcqewrt.data_dt; """
     sql """ DROP TABLE IF EXISTS tbl2; """
     sql """ DROP TABLE IF EXISTS tbl1; """
+
+
+    sql "drop table if exists t01;"
+    sql "drop table if exists t02;"
+    sql"""create table t01 (id int, a varchar(10)) properties 
("replication_num" = "1");"""
+    sql"""create table t02 (id int, b varchar(10)) properties 
("replication_num" = "1");"""
+    sql"insert into t01 values (1, 'a'), (2, null), (3, 'c');"
+    sql"insert into t02 values (1, 'b');"
+    qt_sql"select * from t01 where (not like (a, 'a%')) <=> 'b';"
+    qt_sql"select * from t01 where (not like (a, 'a%')) <=> (select max(b) 
from t02); "
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to