This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 805520bfe4d [Bug](join) fix single null eq get wrong result (#44613) 805520bfe4d is described below commit 805520bfe4d90965bbd844945bd302eff38dc8f3 Author: Pxl <x...@selectdb.com> AuthorDate: Wed Nov 27 11:22:57 2024 +0800 [Bug](join) fix single null eq get wrong result (#44613) introduced by https://github.com/apache/doris/pull/42398 In order to make the null keys equal when using single null eq, all null keys need to be set to default value. --- be/src/pipeline/exec/hashjoin_build_sink.h | 5 ++++- .../exec/join/process_hash_table_probe_impl.h | 4 ++++ .../data/nereids_p0/join/test_mark_join.out | 7 +++++++ regression-test/data/query_p0/join/test_join.out | 6 ++++++ .../suites/nereids_p0/join/test_mark_join.groovy | 21 +++++++++++++++++++++ .../suites/query_p0/join/test_join.groovy | 13 ++++++++++++- 6 files changed, 54 insertions(+), 2 deletions(-) diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h b/be/src/pipeline/exec/hashjoin_build_sink.h index d905afa2758..cc78e6a769f 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.h +++ b/be/src/pipeline/exec/hashjoin_build_sink.h @@ -197,7 +197,10 @@ struct ProcessHashTableBuild { SCOPED_TIMER(_parent->_build_table_insert_timer); hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows, _batch_size, *has_null_key); - + // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. + if (_build_raw_ptrs.size() == 1 && null_map) { + _build_raw_ptrs[0]->assume_mutable()->replace_column_null_data(null_map->data()); + } hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows, null_map ? null_map->data() : nullptr, true, true, hash_table_ctx.hash_table->get_bucket_size()); diff --git a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h index 05cd3d7d9e0..fc1153b3419 100644 --- a/be/src/pipeline/exec/join/process_hash_table_probe_impl.h +++ b/be/src/pipeline/exec/join/process_hash_table_probe_impl.h @@ -173,6 +173,10 @@ typename HashTableType::State ProcessHashTableProbe<JoinOpType>::_init_probe_sid if (!_parent->_ready_probe) { _parent->_ready_probe = true; hash_table_ctx.reset(); + // In order to make the null keys equal when using single null eq, all null keys need to be set to default value. + if (_parent->_probe_columns.size() == 1 && null_map) { + _parent->_probe_columns[0]->assume_mutable()->replace_column_null_data(null_map); + } hash_table_ctx.init_serialized_keys(_parent->_probe_columns, probe_rows, null_map, true, false, hash_table_ctx.hash_table->get_bucket_size()); hash_table_ctx.hash_table->pre_build_idxs(hash_table_ctx.bucket_nums, diff --git a/regression-test/data/nereids_p0/join/test_mark_join.out b/regression-test/data/nereids_p0/join/test_mark_join.out index 59fc7d651ad..a6035556674 100644 --- a/regression-test/data/nereids_p0/join/test_mark_join.out +++ b/regression-test/data/nereids_p0/join/test_mark_join.out @@ -53,3 +53,10 @@ -- !mark_join_null_conjunct -- \N +-- !mark_join8 -- +1 1 1 false +2 2 2 false +3 \N \N \N +3 \N 3 \N +4 \N 4 true + diff --git a/regression-test/data/query_p0/join/test_join.out b/regression-test/data/query_p0/join/test_join.out index 3814530e0c6..7cbaf4f7750 100644 --- a/regression-test/data/query_p0/join/test_join.out +++ b/regression-test/data/query_p0/join/test_join.out @@ -3243,3 +3243,9 @@ false true true false false -- !sql -- 4 +-- !sql -- +2 \N + +-- !sql -- +2 \N + diff --git a/regression-test/suites/nereids_p0/join/test_mark_join.groovy b/regression-test/suites/nereids_p0/join/test_mark_join.groovy index 3b07c357b5d..034900f81ac 100644 --- a/regression-test/suites/nereids_p0/join/test_mark_join.groovy +++ b/regression-test/suites/nereids_p0/join/test_mark_join.groovy @@ -142,4 +142,25 @@ suite("test_mark_join", "nereids_p0") { """ qt_mark_join_null_conjunct """select null in ( select k1 from test_mark_join_t1);""" + + qt_mark_join8 """ + select + k1, + k2, + k3, + k1 not in ( + select + test_mark_join_t2.k2 + from + test_mark_join_t2 + where + test_mark_join_t2.k3 <=> test_mark_join_t1.k3 + ) vv + from + test_mark_join_t1 + order by + 1, + 2, + 3; + """ } diff --git a/regression-test/suites/query_p0/join/test_join.groovy b/regression-test/suites/query_p0/join/test_join.groovy index e75878cb032..fa83fddbd41 100644 --- a/regression-test/suites/query_p0/join/test_join.groovy +++ b/regression-test/suites/query_p0/join/test_join.groovy @@ -720,7 +720,7 @@ suite("test_join", "query,p0") { } qt_left_anti_join_with_other_pred "select b.k1 from baseall b left anti join test t on b.k1 = t.k1 and 1 = 2 order by b.k1" - + // null not in (1,2,3,null) = true qt_left_anti_join_null_1 "select b.k1 from baseall b left anti join test t on b.k1 = t.k1 order by b.k1" qt_left_anti_join_null_2 "select b.k1 from baseall b left anti join test_join_empty_view t on b.k1 = t.k1 order by b.k1" @@ -931,6 +931,7 @@ suite("test_join", "query,p0") { // https://github.com/apache/doris/issues/4210 qt_join_bug3"""select * from baseall t1 where k1 = (select min(k1) from test t2 where t2.k1 = t1.k1 and t2.k2=t1.k2) order by k1""" + // null not in (1,2,3) = false qt_join_bug4"""select b.k1 from baseall b where b.k1 not in( select k1 from baseall where k1 is not null )""" @@ -1330,4 +1331,14 @@ suite("test_join", "query,p0") { qt_sql """ select /*+SET_VAR(batch_size=1, disable_join_reorder=true)*/ count(DISTINCT dcqewrt.engineer) as active_person_count from tbl1 dcqewrt left join [broadcast] tbl2 dd on dd.data_dt = dcqewrt.data_dt; """ sql """ DROP TABLE IF EXISTS tbl2; """ sql """ DROP TABLE IF EXISTS tbl1; """ + + + sql "drop table if exists t01;" + sql "drop table if exists t02;" + sql"""create table t01 (id int, a varchar(10)) properties ("replication_num" = "1");""" + sql"""create table t02 (id int, b varchar(10)) properties ("replication_num" = "1");""" + sql"insert into t01 values (1, 'a'), (2, null), (3, 'c');" + sql"insert into t02 values (1, 'b');" + qt_sql"select * from t01 where (not like (a, 'a%')) <=> 'b';" + qt_sql"select * from t01 where (not like (a, 'a%')) <=> (select max(b) from t02); " } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org