This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch new_join2 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/new_join2 by this push: new 05d723dab44 fix 05d723dab44 is described below commit 05d723dab449f68ece332c7880ec76549bbe23a8 Author: BiteTheDDDDt <pxl...@qq.com> AuthorDate: Fri Nov 24 19:06:08 2023 +0800 fix --- be/src/vec/common/hash_table/hash_map.h | 12 +++++++++-- be/src/vec/exec/join/process_hash_table_probe.h | 2 +- .../vec/exec/join/process_hash_table_probe_impl.h | 23 ++++++++++------------ be/src/vec/exec/join/vhash_join_node.h | 3 ++- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h index 08ca7628a03..39872e205b3 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/hash_map.h @@ -224,7 +224,8 @@ public: } template <int JoinOpType> - void prepare_build(size_t num_elem, int batch_size) { + void prepare_build(size_t num_elem, int batch_size, bool has_null_key) { + _has_null_key = has_null_key; max_batch_size = batch_size; bucket_size = calc_bucket_size(num_elem + 1); first.resize(bucket_size + 1); @@ -314,6 +315,8 @@ public: return iter_idx >= elem_num; } + bool has_null_key() { return _has_null_key; } + private: // only LEFT_ANTI_JOIN/LEFT_SEMI_JOIN/NULL_AWARE_LEFT_ANTI_JOIN/CROSS_JOIN support mark join template <int JoinOpType, bool with_other_conjuncts> @@ -338,7 +341,11 @@ private: } else { bool matched = JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0 : build_idx == 0; - mark_column->insert_value(matched); + if (!matched && _has_null_key) { + mark_column->insert_null(); + } else { + mark_column->insert_value(matched); + } } } @@ -515,6 +522,7 @@ private: mutable uint32_t iter_idx = 1; Cell cell; doris::vectorized::Arena* pool; + bool _has_null_key = false; }; template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>, diff --git a/be/src/vec/exec/join/process_hash_table_probe.h b/be/src/vec/exec/join/process_hash_table_probe.h index e1e893ce190..995c3992245 100644 --- a/be/src/vec/exec/join/process_hash_table_probe.h +++ b/be/src/vec/exec/join/process_hash_table_probe.h @@ -68,7 +68,7 @@ struct ProcessHashTableProbe { // and output block may be different // The output result is determined by the other join conjunct result and same_to_prev struct Status do_other_join_conjuncts(Block* output_block, bool is_mark_join, - bool is_the_last_sub_block, std::vector<uint8_t>& visited); + std::vector<uint8_t>& visited, bool has_null_in_build_side); template <typename HashTableType> typename HashTableType::State _init_probe_side(HashTableType& hash_table_ctx, size_t probe_rows, diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h b/be/src/vec/exec/join/process_hash_table_probe_impl.h index a93736b4371..d38189087c7 100644 --- a/be/src/vec/exec/join/process_hash_table_probe_impl.h +++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h @@ -164,9 +164,6 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash bool all_match_one = false; size_t probe_size = 0; - // Is the last sub block of splitted block - bool is_the_last_sub_block = false; - std::unique_ptr<ColumnFilterHelper> mark_column; if (is_mark_join) { mark_column = std::make_unique<ColumnFilterHelper>(*mcol[mcol.size() - 1]); @@ -199,8 +196,9 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash output_block->swap(mutable_block.to_block()); if constexpr (with_other_conjuncts) { - return do_other_join_conjuncts(output_block, is_mark_join, is_the_last_sub_block, - hash_table_ctx.hash_table->get_visited()); + return do_other_join_conjuncts(output_block, is_mark_join, + hash_table_ctx.hash_table->get_visited(), + hash_table_ctx.hash_table->has_null_key()); } return Status::OK(); @@ -208,8 +206,8 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_process(HashTableType& hash template <int JoinOpType, typename Parent> Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( - Block* output_block, bool is_mark_join, bool is_the_last_sub_block, - std::vector<uint8_t>& visited) { + Block* output_block, bool is_mark_join, std::vector<uint8_t>& visited, + bool has_null_in_build_side) { // dispose the other join conjunct exec auto row_count = output_block->rows(); if (!row_count) { @@ -280,13 +278,12 @@ Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts( for (size_t i = 0; i < row_count; ++i) { filter_map[i] = true; - if constexpr (JoinOpType != TJoinOp::LEFT_SEMI_JOIN) { - if (!_build_indexs[i]) { - helper.insert_null(); - continue; - } + if (has_null_in_build_side && + (_build_indexs[i] != 0) ^ (JoinOpType == TJoinOp::LEFT_SEMI_JOIN)) { + helper.insert_null(); + } else { + helper.insert_value(filter_column_ptr[i]); } - helper.insert_value(filter_column_ptr[i]); } } else { if constexpr (JoinOpType == TJoinOp::LEFT_SEMI_JOIN) { diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index aecd49c8dbc..7f66fa85905 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -133,7 +133,8 @@ struct ProcessHashTableBuild { } SCOPED_TIMER(_parent->_build_table_insert_timer); - hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows, _batch_size); + hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows, _batch_size, + *has_null_key); hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows, null_map ? null_map->data() : nullptr, true, true, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org