This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 09501d0d65a [fix](join) incorrect result of right anti join with nullable (#28258) 09501d0d65a is described below commit 09501d0d65a71687c038aa4d567b231f587445ae Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Wed Dec 13 15:51:26 2023 +0800 [fix](join) incorrect result of right anti join with nullable (#28258) If the probe column on the left side is not nullable and the build column on the right is nullable, the anti join's result is incorrect. --- be/src/vec/exec/join/vhash_join_node.cpp | 11 + be/src/vec/exec/join/vhash_join_node.h | 4 + .../join/test_half_join_nullable_build_side.out | 128 ++++++++++ .../join/test_half_join_nullable_build_side.groovy | 280 +++++++++++++++++++++ 4 files changed, 423 insertions(+) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 37515671660..d0ac9c1ca5e 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -840,6 +840,7 @@ void HashJoinNode::_prepare_probe_block() { column_type.column = remove_nullable(column_type.column); column_type.type = remove_nullable(column_type.type); } + _temp_probe_nullable_columns.clear(); release_block_memory(_probe_block); } @@ -1057,6 +1058,7 @@ Status HashJoinNode::_extract_join_column(Block& block, ColumnUInt8::MutablePtr& ColumnRawPtrs& raw_ptrs, const std::vector<int>& res_col_ids) { DCHECK_EQ(_build_expr_ctxs.size(), _probe_expr_ctxs.size()); + _temp_probe_nullable_columns.clear(); for (size_t i = 0; i < _build_expr_ctxs.size(); ++i) { if (_is_null_safe_eq_join[i]) { raw_ptrs[i] = block.get_by_position(res_col_ids[i]).column.get(); @@ -1080,6 +1082,15 @@ Status HashJoinNode::_extract_join_column(Block& block, ColumnUInt8::MutablePtr& raw_ptrs[i] = &col_nested; } } else { + if constexpr (!BuildSide) { + if (_join_op == TJoinOp::RIGHT_ANTI_JOIN && + _build_expr_ctxs[i]->root()->is_nullable()) { + _temp_probe_nullable_columns.emplace_back(make_nullable( + block.get_by_position(res_col_ids[i]).column->assume_mutable())); + raw_ptrs[i] = _temp_probe_nullable_columns.back().get(); + continue; + } + } raw_ptrs[i] = column; } } diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index b77f8fb68e8..aa45dc17e49 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -293,6 +293,10 @@ private: // mark the build hash table whether it needs to store null value std::vector<bool> _store_null_in_hash_table; + // In right anti join, if the probe side is not nullable and the build side is nullable, + // we need to convert the probe column to nullable. + std::vector<ColumnPtr> _temp_probe_nullable_columns; + std::vector<uint16_t> _probe_column_disguise_null; std::vector<uint16_t> _probe_column_convert_to_null; diff --git a/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out new file mode 100644 index 00000000000..6f7d74acd37 --- /dev/null +++ b/regression-test/data/query_p0/join/test_half_join_nullable_build_side.out @@ -0,0 +1,128 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql1 -- +1 11 11 \N \N \N +2 111 111 2 111 111 +3 1111 1111 3 1111 1111 +4 111 111 2 111 111 + +-- !sql2 -- +1 11 11 \N \N \N +2 111 111 2 111 111 +3 1111 1111 3 1111 1111 +4 111 111 2 111 111 + +-- !sql3 -- +1 11 11 \N \N \N +2 111 111 2 111 111 +3 1111 1111 3 1111 1111 +4 111 111 2 111 111 + +-- !anti_sql4 -- +1 11 11 + +-- !sql5 -- +2 111 111 +3 1111 1111 +4 111 111 + +-- !anti_sql6 -- +1 11 11 + +-- !sql7 -- +2 111 111 +3 1111 1111 +4 111 111 + +-- !anti_sql8 -- +1 11 11 + +-- !sql9 -- +2 111 111 +3 1111 1111 +4 111 111 + +-- !anti_sql10 -- +1 11 11 + +-- !sql11 -- +2 111 111 +3 1111 1111 +4 111 111 + +-- !anti_sql12 -- +1 11 11 + +-- !sql13 -- +2 111 111 +3 1111 1111 +4 111 111 + +-- !anti_sql14 -- +1 \N \N +4 \N \N + +-- !sql15 -- +2 111 111 +3 1111 1111 + +-- !anti_sql16 -- +1 \N \N +4 \N \N + +-- !sql17 -- +2 111 111 +3 1111 1111 + +-- !anti_sql18 -- +1 \N \N +4 \N \N + +-- !sql19 -- +2 111 111 +3 1111 1111 + +-- !anti_sql20 -- +1 \N \N +4 \N \N + +-- !sql21 -- +2 111 111 +3 1111 1111 + +-- !anti_sql22 -- +1 \N \N +4 \N \N + +-- !sql23 -- +2 111 111 +3 1111 1111 + +-- !anti_sql24 -- +1 11 11 + +-- !sql25 -- +2 111 111 +3 1111 1111 +4 \N \N +5 1111 1111 + +-- !anti_sql26 -- + +-- !sql27 -- +1 \N \N +2 111 111 +3 1111 1111 +4 \N \N + +-- !anti_sql28 -- + +-- !sql29 -- +2 111 111 +3 1111 1111 + +-- !anti_sql28 -- + +-- !sql29 -- +2 111 111 +3 1111 1111 + diff --git a/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy new file mode 100644 index 00000000000..bddccb26ab1 --- /dev/null +++ b/regression-test/suites/query_p0/join/test_half_join_nullable_build_side.groovy @@ -0,0 +1,280 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_half_join_nullable_build_side", "query,p0") { + sql " set disable_join_reorder = 1; " + sql " drop table if exists test_half_join_nullable_build_side_l; "; + sql " drop table if exists test_half_join_nullable_build_side_l2; "; + sql " drop table if exists test_half_join_nullable_build_side_r; "; + sql " drop table if exists test_half_join_nullable_build_side_r2; "; + sql """ + create table test_half_join_nullable_build_side_l ( + k1 int, + v1 string not null, + v2 int not null + ) distributed by hash(k1) buckets 1 + properties("replication_num" = "1"); + """ + sql """ + create table test_half_join_nullable_build_side_l2 ( + k1 int, + v1 string null, + v2 int null + ) distributed by hash(k1) buckets 1 + properties("replication_num" = "1"); + """ + sql """ + create table test_half_join_nullable_build_side_r ( + k1 int, + v1 string null, + v2 string null + ) distributed by hash(k1) buckets 1 + properties("replication_num" = "1"); + """ + sql """ + create table test_half_join_nullable_build_side_r2 ( + k1 int, + v1 string not null, + v2 string not null + ) distributed by hash(k1) buckets 1 + properties("replication_num" = "1"); + """ + + sql """ insert into test_half_join_nullable_build_side_l values (1, 11, "11"), (2, 111, "111"), (3, 1111, "1111"), (4, 111, "111") """ + sql """ insert into test_half_join_nullable_build_side_l2 values (1, 11, "11"), (2, 111, "111"), (3, 1111, "1111"), (4, null, null), (5, 1111, "1111") """ + sql """ insert into test_half_join_nullable_build_side_r values (1, null, null), (2, 111, "111"), (3, 1111, "1111"), (4, null, null) """ + sql """ insert into test_half_join_nullable_build_side_r2 values (2, 111, "111"), (3, 1111, "1111") """ + + qt_sql1 """ + select * + from + test_half_join_nullable_build_side_l l left join test_half_join_nullable_build_side_r r on l.v1 = r.v1 + order by 1, 2, 3, 4, 5, 6; + """ + + qt_sql2 """ + select * + from + test_half_join_nullable_build_side_l l left join test_half_join_nullable_build_side_r r on l.v2 = r.v2 + order by 1, 2, 3, 4, 5, 6; + """ + + qt_sql3 """ + select * + from + test_half_join_nullable_build_side_l l left join test_half_join_nullable_build_side_r r on l.v1 = r.v1 and l.v2 = r.v2 + order by 1, 2, 3, 4, 5, 6; + """ + + qt_anti_sql4 """ + select * + from + test_half_join_nullable_build_side_l l left anti join test_half_join_nullable_build_side_r r on l.v1 = r.v1 + order by 1, 2, 3; + """ + + qt_sql5 """ + select * + from + test_half_join_nullable_build_side_l l left semi join test_half_join_nullable_build_side_r r on l.v1 = r.v1 + order by 1, 2, 3; + """ + + qt_anti_sql6 """ + select * + from + test_half_join_nullable_build_side_l l left anti join test_half_join_nullable_build_side_r r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_sql7 """ + select * + from + test_half_join_nullable_build_side_l l left semi join test_half_join_nullable_build_side_r r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql8 """ + select * + from + test_half_join_nullable_build_side_l l left anti join test_half_join_nullable_build_side_r r on l.v1 = r.v1 and l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_sql9 """ + select * + from + test_half_join_nullable_build_side_l l left semi join test_half_join_nullable_build_side_r r on l.v1 = r.v1 and l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql10 """ + select * + from + test_half_join_nullable_build_side_l l left anti join test_half_join_nullable_build_side_r r on l.v1 <=> r.v1 + order by 1, 2, 3; + """ + + qt_sql11 """ + select * + from + test_half_join_nullable_build_side_l l left semi join test_half_join_nullable_build_side_r r on l.v1 <=> r.v1 + order by 1, 2, 3; + """ + + qt_anti_sql12 """ + select * + from + test_half_join_nullable_build_side_l l left anti join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_sql13 """ + select * + from + test_half_join_nullable_build_side_l l left semi join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + + + qt_anti_sql14 """ + select * + from + test_half_join_nullable_build_side_l l right anti join test_half_join_nullable_build_side_r r on l.v1 = r.v1 + order by 1, 2, 3; + """ + + qt_sql15 """ + select * + from + test_half_join_nullable_build_side_l l right semi join test_half_join_nullable_build_side_r r on l.v1 = r.v1 + order by 1, 2, 3; + """ + + qt_anti_sql16 """ + select * + from + test_half_join_nullable_build_side_l l right anti join test_half_join_nullable_build_side_r r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_sql17 """ + select * + from + test_half_join_nullable_build_side_l l right semi join test_half_join_nullable_build_side_r r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql18 """ + select * + from + test_half_join_nullable_build_side_l l right anti join test_half_join_nullable_build_side_r r on l.v1 = r.v1 and l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_sql19 """ + select * + from + test_half_join_nullable_build_side_l l right semi join test_half_join_nullable_build_side_r r on l.v1 = r.v1 and l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql20 """ + select * + from + test_half_join_nullable_build_side_l l right anti join test_half_join_nullable_build_side_r r on l.v1 <=> r.v1 + order by 1, 2, 3; + """ + + qt_sql21 """ + select * + from + test_half_join_nullable_build_side_l l right semi join test_half_join_nullable_build_side_r r on l.v1 <=> r.v1 + order by 1, 2, 3; + """ + + qt_anti_sql22 """ + select * + from + test_half_join_nullable_build_side_l l right anti join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_sql23 """ + select * + from + test_half_join_nullable_build_side_l l right semi join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql24 """ + select * + from + test_half_join_nullable_build_side_l2 l left anti join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_sql25 """ + select * + from + test_half_join_nullable_build_side_l2 l left semi join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql26 """ + select * + from + test_half_join_nullable_build_side_l2 l right anti join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_sql27 """ + select * + from + test_half_join_nullable_build_side_l2 l right semi join test_half_join_nullable_build_side_r r on l.v2 <=> r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql28 """ + select * + from + test_half_join_nullable_build_side_l l right anti join test_half_join_nullable_build_side_r2 r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_sql29 """ + select * + from + test_half_join_nullable_build_side_l l right semi join test_half_join_nullable_build_side_r2 r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_anti_sql28 """ + select * + from + test_half_join_nullable_build_side_l2 l right anti join test_half_join_nullable_build_side_r2 r on l.v2 = r.v2 + order by 1, 2, 3; + """ + + qt_sql29 """ + select * + from + test_half_join_nullable_build_side_l2 l right semi join test_half_join_nullable_build_side_r2 r on l.v2 = r.v2 + order by 1, 2, 3; + """ +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org