This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch new_join2
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/new_join2 by this push:
     new 05d723dab44 fix
05d723dab44 is described below

commit 05d723dab449f68ece332c7880ec76549bbe23a8
Author: BiteTheDDDDt <pxl...@qq.com>
AuthorDate: Fri Nov 24 19:06:08 2023 +0800

    fix
---
 be/src/vec/common/hash_table/hash_map.h            | 12 +++++++++--
 be/src/vec/exec/join/process_hash_table_probe.h    |  2 +-
 .../vec/exec/join/process_hash_table_probe_impl.h  | 23 ++++++++++------------
 be/src/vec/exec/join/vhash_join_node.h             |  3 ++-
 4 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index 08ca7628a03..39872e205b3 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -224,7 +224,8 @@ public:
     }
 
     template <int JoinOpType>
-    void prepare_build(size_t num_elem, int batch_size) {
+    void prepare_build(size_t num_elem, int batch_size, bool has_null_key) {
+        _has_null_key = has_null_key;
         max_batch_size = batch_size;
         bucket_size = calc_bucket_size(num_elem + 1);
         first.resize(bucket_size + 1);
@@ -314,6 +315,8 @@ public:
         return iter_idx >= elem_num;
     }
 
+    bool has_null_key() { return _has_null_key; }
+
 private:
     // only LEFT_ANTI_JOIN/LEFT_SEMI_JOIN/NULL_AWARE_LEFT_ANTI_JOIN/CROSS_JOIN 
support mark join
     template <int JoinOpType, bool with_other_conjuncts>
@@ -338,7 +341,11 @@ private:
                 } else {
                     bool matched = JoinOpType == 
doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx != 0
                                                                                
 : build_idx == 0;
-                    mark_column->insert_value(matched);
+                    if (!matched && _has_null_key) {
+                        mark_column->insert_null();
+                    } else {
+                        mark_column->insert_value(matched);
+                    }
                 }
             }
 
@@ -515,6 +522,7 @@ private:
     mutable uint32_t iter_idx = 1;
     Cell cell;
     doris::vectorized::Arena* pool;
+    bool _has_null_key = false;
 };
 
 template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h 
b/be/src/vec/exec/join/process_hash_table_probe.h
index e1e893ce190..995c3992245 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -68,7 +68,7 @@ struct ProcessHashTableProbe {
     // and output block may be different
     // The output result is determined by the other join conjunct result and 
same_to_prev struct
     Status do_other_join_conjuncts(Block* output_block, bool is_mark_join,
-                                   bool is_the_last_sub_block, 
std::vector<uint8_t>& visited);
+                                   std::vector<uint8_t>& visited, bool 
has_null_in_build_side);
 
     template <typename HashTableType>
     typename HashTableType::State _init_probe_side(HashTableType& 
hash_table_ctx, size_t probe_rows,
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h 
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index a93736b4371..d38189087c7 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -164,9 +164,6 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
     bool all_match_one = false;
     size_t probe_size = 0;
 
-    // Is the last sub block of splitted block
-    bool is_the_last_sub_block = false;
-
     std::unique_ptr<ColumnFilterHelper> mark_column;
     if (is_mark_join) {
         mark_column = std::make_unique<ColumnFilterHelper>(*mcol[mcol.size() - 
1]);
@@ -199,8 +196,9 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
     output_block->swap(mutable_block.to_block());
 
     if constexpr (with_other_conjuncts) {
-        return do_other_join_conjuncts(output_block, is_mark_join, 
is_the_last_sub_block,
-                                       
hash_table_ctx.hash_table->get_visited());
+        return do_other_join_conjuncts(output_block, is_mark_join,
+                                       
hash_table_ctx.hash_table->get_visited(),
+                                       
hash_table_ctx.hash_table->has_null_key());
     }
 
     return Status::OK();
@@ -208,8 +206,8 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_process(HashTableType& hash
 
 template <int JoinOpType, typename Parent>
 Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
-        Block* output_block, bool is_mark_join, bool is_the_last_sub_block,
-        std::vector<uint8_t>& visited) {
+        Block* output_block, bool is_mark_join, std::vector<uint8_t>& visited,
+        bool has_null_in_build_side) {
     // dispose the other join conjunct exec
     auto row_count = output_block->rows();
     if (!row_count) {
@@ -280,13 +278,12 @@ Status ProcessHashTableProbe<JoinOpType, 
Parent>::do_other_join_conjuncts(
 
             for (size_t i = 0; i < row_count; ++i) {
                 filter_map[i] = true;
-                if constexpr (JoinOpType != TJoinOp::LEFT_SEMI_JOIN) {
-                    if (!_build_indexs[i]) {
-                        helper.insert_null();
-                        continue;
-                    }
+                if (has_null_in_build_side &&
+                    (_build_indexs[i] != 0) ^ (JoinOpType == 
TJoinOp::LEFT_SEMI_JOIN)) {
+                    helper.insert_null();
+                } else {
+                    helper.insert_value(filter_column_ptr[i]);
                 }
-                helper.insert_value(filter_column_ptr[i]);
             }
         } else {
             if constexpr (JoinOpType == TJoinOp::LEFT_SEMI_JOIN) {
diff --git a/be/src/vec/exec/join/vhash_join_node.h 
b/be/src/vec/exec/join/vhash_join_node.h
index aecd49c8dbc..7f66fa85905 100644
--- a/be/src/vec/exec/join/vhash_join_node.h
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -133,7 +133,8 @@ struct ProcessHashTableBuild {
         }
 
         SCOPED_TIMER(_parent->_build_table_insert_timer);
-        hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows, 
_batch_size);
+        hash_table_ctx.hash_table->template prepare_build<JoinOpType>(_rows, 
_batch_size,
+                                                                      
*has_null_key);
 
         hash_table_ctx.init_serialized_keys(_build_raw_ptrs, _rows,
                                             null_map ? null_map->data() : 
nullptr, true, true,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to