github-actions[bot] commented on code in PR #26138:
URL: https://github.com/apache/doris/pull/26138#discussion_r1377180624


##########
be/src/pipeline/exec/exchange_sink_operator.cpp:
##########
@@ -235,6 +235,14 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) {
     return Status::OK();
 }
 
+std::string ExchangeSinkLocalState::id_name() {

Review Comment:
   warning: method 'id_name' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/pipeline/exec/exchange_sink_operator.h:161:
   ```diff
   -     std::string id_name() override;
   +     static std::string id_name() override;
   ```
   



##########
be/src/service/internal_service.cpp:
##########
@@ -355,7 +355,7 @@ void 
PInternalServiceImpl::exec_plan_fragment_start(google::protobuf::RpcControl
     }
 }
 
-void PInternalServiceImpl::open_stream_sink(google::protobuf::RpcController* 
controller,
+void PInternalServiceImpl::open_load_stream(google::protobuf::RpcController* 
controller,

Review Comment:
   warning: method 'open_load_stream' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static void 
PInternalServiceImpl::open_load_stream(google::protobuf::RpcController* 
controller,
   ```
   



##########
be/src/vec/columns/column_string.cpp:
##########
@@ -161,6 +161,43 @@
     }
 }
 
+void ColumnString::insert_indices_from_join(const IColumn& src, const 
uint32_t* indices_begin,
+                                            const uint32_t* indices_end) {
+    const ColumnString& src_str = assert_cast<const ColumnString&>(src);
+    auto src_offset_data = src_str.offsets.data();
+
+    auto old_char_size = chars.size();
+    size_t total_chars_size = old_char_size;
+
+    auto dst_offsets_pos = offsets.size();
+    offsets.resize(offsets.size() + indices_end - indices_begin);
+    auto* dst_offsets_data = offsets.data();
+
+    for (auto x = indices_begin; x != indices_end; ++x) {
+        if (*x != 0) {
+            total_chars_size += src_offset_data[*x] - src_offset_data[*x - 1];
+        }
+        dst_offsets_data[dst_offsets_pos++] = total_chars_size;
+    }
+    check_chars_length(total_chars_size, offsets.size());
+
+    chars.resize(total_chars_size);
+
+    auto* src_data_ptr = src_str.chars.data();
+    auto* dst_data_ptr = chars.data();
+
+    size_t dst_chars_pos = old_char_size;
+    for (auto x = indices_begin; x != indices_end; ++x) {

Review Comment:
   warning: 'auto x' can be declared as 'const auto *x' 
[readability-qualified-auto]
   
   ```suggestion
       for (const auto *x = indices_begin; x != indices_end; ++x) {
   ```
   



##########
be/src/vec/common/hash_table/hash_map.h:
##########
@@ -193,10 +197,214 @@
     bool has_null_key_data() const { return false; }
 };
 
+template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
+class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, 
Allocator> {
+public:
+    using Self = JoinHashMapTable;
+    using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
+
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+    using mapped_type = typename Cell::Mapped;
+
+    using LookupResult = typename Base::LookupResult;
+
+    using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable;
+
+    static uint32_t calc_bucket_size(size_t num_elem) {
+        size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem 
- 1) / 7;
+        return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
+    }
+
+    template <int JoinOpType>
+    void prepare_build(size_t num_elem, int batch_size) {
+        max_batch_size = batch_size;
+        bucket_size = calc_bucket_size(num_elem + 1);
+        first.resize(bucket_size, 0);
+        next.resize(num_elem);
+
+        if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            visited.resize(num_elem, 0);
+        }
+    }
+
+    uint32_t get_bucket_size() const { return bucket_size; }
+
+    size_t size() const { return next.size(); }
+
+    void build(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
+               size_t num_elem) {
+        build_keys = keys;
+        for (size_t i = 1; i < num_elem; i++) {
+            uint32_t bucket_num = bucket_nums[i];
+            next[i] = first[bucket_num];
+            first[bucket_num] = i;
+        }
+    }
+
+    template <int JoinOpType>
+    auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
+                    int probe_idx, uint32_t build_idx, int probe_rows,
+                    uint32_t* __restrict probe_idxs, uint32_t* __restrict 
build_idxs) {
+        if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
+                      JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
+            return _find_batch_inner_outer_join<JoinOpType>(keys, bucket_nums, 
probe_idx, build_idx,
+                                                            probe_rows, 
probe_idxs, build_idxs);
+        }
+        if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN) {
+            return _find_batch_left_semi_anti<JoinOpType>(keys, bucket_nums, 
probe_idx, probe_rows,
+                                                          probe_idxs);
+        }
+        if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            return _find_batch_right_semi_anti(keys, bucket_nums, probe_idx, 
probe_rows);
+        }
+        return std::tuple {0, 0u, 0};

Review Comment:
   warning: integer literal has suffix 'u', which is not uppercase 
[readability-uppercase-literal-suffix]
   
   ```suggestion
           return std::tuple {0, 0U, 0};
   ```
   



##########
be/src/vec/common/hash_table/hash_map.h:
##########
@@ -193,10 +197,214 @@
     bool has_null_key_data() const { return false; }
 };
 
+template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
+class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, 
Allocator> {
+public:
+    using Self = JoinHashMapTable;
+    using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
+
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+    using mapped_type = typename Cell::Mapped;
+
+    using LookupResult = typename Base::LookupResult;
+
+    using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable;
+
+    static uint32_t calc_bucket_size(size_t num_elem) {
+        size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem 
- 1) / 7;
+        return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
+    }
+
+    template <int JoinOpType>
+    void prepare_build(size_t num_elem, int batch_size) {
+        max_batch_size = batch_size;
+        bucket_size = calc_bucket_size(num_elem + 1);
+        first.resize(bucket_size, 0);
+        next.resize(num_elem);
+
+        if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            visited.resize(num_elem, 0);
+        }
+    }
+
+    uint32_t get_bucket_size() const { return bucket_size; }
+
+    size_t size() const { return next.size(); }
+
+    void build(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
+               size_t num_elem) {
+        build_keys = keys;
+        for (size_t i = 1; i < num_elem; i++) {
+            uint32_t bucket_num = bucket_nums[i];
+            next[i] = first[bucket_num];
+            first[bucket_num] = i;
+        }
+    }
+
+    template <int JoinOpType>
+    auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
+                    int probe_idx, uint32_t build_idx, int probe_rows,
+                    uint32_t* __restrict probe_idxs, uint32_t* __restrict 
build_idxs) {
+        if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
+                      JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
+            return _find_batch_inner_outer_join<JoinOpType>(keys, bucket_nums, 
probe_idx, build_idx,
+                                                            probe_rows, 
probe_idxs, build_idxs);
+        }
+        if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN) {
+            return _find_batch_left_semi_anti<JoinOpType>(keys, bucket_nums, 
probe_idx, probe_rows,
+                                                          probe_idxs);
+        }
+        if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            return _find_batch_right_semi_anti(keys, bucket_nums, probe_idx, 
probe_rows);
+        }
+        return std::tuple {0, 0u, 0};
+    }
+
+    template <int JoinOpType>
+    bool iterate_map(std::vector<uint32_t>& build_idxs) const {
+        const auto batch_size = max_batch_size;
+        const auto elem_num = visited.size();
+        int count = 0;
+        build_idxs.resize(batch_size);
+
+        while (count < batch_size && iter_idx < elem_num) {
+            const auto matched = visited[iter_idx];
+            build_idxs[count] = iter_idx;
+            if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN) {
+                count += !matched;
+            } else {
+                count += matched;
+            }
+            iter_idx++;
+        }
+
+        build_idxs.resize(count);
+        return iter_idx >= elem_num;
+    }
+
+private:
+    auto _find_batch_right_semi_anti(const Key* __restrict keys,
+                                     const uint32_t* __restrict bucket_nums, 
int probe_idx,
+                                     int probe_rows) {
+        while (probe_idx < probe_rows) {
+            auto build_idx = first[bucket_nums[probe_idx]];
+
+            while (build_idx) {
+                if (keys[probe_idx] == build_keys[build_idx]) {
+                    visited[build_idx] = 1;
+                }
+                build_idx = next[build_idx];
+            }
+            probe_idx++;
+        }
+        return std::tuple {probe_idx, 0u, 0};
+    }
+
+    template <int JoinOpType>
+    auto _find_batch_left_semi_anti(const Key* __restrict keys,
+                                    const uint32_t* __restrict bucket_nums, 
int probe_idx,
+                                    int probe_rows, uint32_t* __restrict 
probe_idxs) {
+        auto matched_cnt = 0;
+        const auto batch_size = max_batch_size;
+
+        while (probe_idx < probe_rows && matched_cnt < batch_size) {
+            uint32_t bucket_num = bucket_nums[probe_idx];
+            auto build_idx = first[bucket_num];
+
+            while (build_idx) {
+                if (keys[probe_idx] == build_keys[build_idx]) {
+                    break;
+                }
+                build_idx = next[build_idx];
+            }
+            bool matched =
+                    JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN ? build_idx 
!= 0 : build_idx == 0;
+            matched_cnt += matched;
+            probe_idxs[matched_cnt - matched] = probe_idx++;
+        }
+        return std::tuple {probe_idx, 0u, matched_cnt};

Review Comment:
   warning: integer literal has suffix 'u', which is not uppercase 
[readability-uppercase-literal-suffix]
   
   ```suggestion
           return std::tuple {probe_idx, 0U, matched_cnt};
   ```
   



##########
be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp:
##########
@@ -140,47 +143,37 @@ Status 
DataTypeDateTimeV2SerDe::write_column_to_mysql(const IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeDateTimeV2SerDe::write_column_to_orc(const IColumn& column, 
const NullMap* null_map,
+Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& 
timezone,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeDateTimeV2SerDe::write_column_to_orc(const 
std::string& timezone,
   ```
   
   be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp:149:
   ```diff
   -                                                     
std::vector<StringRef>& buffer_list) const {
   +                                                     
std::vector<StringRef>& buffer_list) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_quantilestate_serde.h:
##########
@@ -121,8 +121,9 @@ class DataTypeQuantileStateSerDe : public DataTypeSerDe {
         return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
     }
 
-    Status write_column_to_orc(const IColumn& column, const NullMap* null_map,
-                               orc::ColumnVectorBatch* orc_col_batch, int 
start, int end,
+    Status write_column_to_orc(const std::string& timezone, const IColumn& 
column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static Status write_column_to_orc(const std::string& timezone, const 
IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_quantilestate_serde.h:126:
   ```diff
   -                                std::vector<StringRef>& buffer_list) const 
override {
   +                                std::vector<StringRef>& buffer_list) 
override {
   ```
   



##########
be/src/vec/data_types/serde/data_type_string_serde.cpp:
##########
@@ -239,7 +239,8 @@ Status DataTypeStringSerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeStringSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeStringSerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeStringSerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_string_serde.cpp:245:
   ```diff
   -                                                 std::vector<StringRef>& 
buffer_list) const {
   +                                                 std::vector<StringRef>& 
buffer_list) {
   ```
   



##########
be/src/pipeline/exec/scan_operator.h:
##########
@@ -219,7 +220,8 @@ class ScanLocalState : public ScanLocalStateBase {
     }
 
     Status clone_conjunct_ctxs(vectorized::VExprContextSPtrs& conjuncts) 
override;
-    virtual void set_scan_ranges(const std::vector<TScanRangeParams>& 
scan_ranges) override {}
+    virtual void set_scan_ranges(RuntimeState* state,

Review Comment:
   warning: 'virtual' is redundant since the function is already declared 
'override' [modernize-use-override]
   
   ```suggestion
       void set_scan_ranges(RuntimeState* state,
   ```
   



##########
be/src/vec/columns/column_string.cpp:
##########
@@ -161,6 +161,43 @@ void ColumnString::insert_indices_from(const IColumn& src, 
const int* indices_be
     }
 }
 
+void ColumnString::insert_indices_from_join(const IColumn& src, const 
uint32_t* indices_begin,

Review Comment:
   warning: method 'insert_indices_from_join' can be made static 
[readability-convert-member-functions-to-static]
   
   be/src/vec/columns/column_string.h:477:
   ```diff
   -     void insert_indices_from_join(const IColumn& src, const uint32_t* 
indices_begin,
   +     static void insert_indices_from_join(const IColumn& src, const 
uint32_t* indices_begin,
   ```
   



##########
be/src/vec/common/hash_table/hash_map.h:
##########
@@ -20,6 +20,10 @@
 
 #pragma once
 
+#include <gen_cpp/PlanNodes_types.h>

Review Comment:
   warning: 'gen_cpp/PlanNodes_types.h' file not found [clang-diagnostic-error]
   ```cpp
   #include <gen_cpp/PlanNodes_types.h>
            ^
   ```
   



##########
be/src/exprs/runtime_filter.cpp:
##########
@@ -508,24 +476,33 @@ class RuntimePredicateWrapper {
         }
     }
 
-    void insert_batch(const vectorized::ColumnPtr column, const 
std::vector<int>& rows) {
+    void insert_batch(const vectorized::ColumnPtr& column, size_t start) {
         if (get_real_type() == RuntimeFilterType::BITMAP_FILTER) {
-            bitmap_filter_insert_batch(column, rows);
-        } else if (IRuntimeFilter::enable_use_batch(_be_exec_version > 0, 
_column_return_type)) {
-            insert_fixed_len(column->get_raw_data().data, rows.data(), 
rows.size());
+            bitmap_filter_insert_batch(column, start);
         } else {
-            for (int index : rows) {
-                insert(column->get_data_at(index));
-            }
+            insert_fixed_len(column, start);
         }
     }
 
-    void bitmap_filter_insert_batch(const vectorized::ColumnPtr column,
-                                    const std::vector<int>& rows) {
+    void bitmap_filter_insert_batch(const vectorized::ColumnPtr column, size_t 
start) {

Review Comment:
   warning: method 'bitmap_filter_insert_batch' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static void bitmap_filter_insert_batch(const vectorized::ColumnPtr 
column, size_t start) {
   ```
   



##########
be/src/vec/common/hash_table/hash_map.h:
##########
@@ -193,10 +197,214 @@
     bool has_null_key_data() const { return false; }
 };
 
+template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
+class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, 
Allocator> {
+public:
+    using Self = JoinHashMapTable;
+    using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
+
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+    using mapped_type = typename Cell::Mapped;
+
+    using LookupResult = typename Base::LookupResult;
+
+    using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable;
+
+    static uint32_t calc_bucket_size(size_t num_elem) {
+        size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem 
- 1) / 7;

Review Comment:
   warning: 7 is a magic number; consider replacing it with a named constant 
[readability-magic-numbers]
   ```cpp
           size_t expect_bucket_size = static_cast<size_t>(num_elem) + 
(num_elem - 1) / 7;
                                                                                
        ^
   ```
   



##########
be/src/vec/data_types/serde/data_type_date64_serde.cpp:
##########
@@ -271,7 +271,8 @@ Status DataTypeDate64SerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeDate64SerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeDate64SerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeDate64SerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_date64_serde.cpp:277:
   ```diff
   -                                                 std::vector<StringRef>& 
buffer_list) const {
   +                                                 std::vector<StringRef>& 
buffer_list) {
   ```
   



##########
be/src/vec/columns/column_string.cpp:
##########
@@ -161,6 +161,43 @@
     }
 }
 
+void ColumnString::insert_indices_from_join(const IColumn& src, const 
uint32_t* indices_begin,
+                                            const uint32_t* indices_end) {
+    const ColumnString& src_str = assert_cast<const ColumnString&>(src);
+    auto src_offset_data = src_str.offsets.data();
+
+    auto old_char_size = chars.size();
+    size_t total_chars_size = old_char_size;
+
+    auto dst_offsets_pos = offsets.size();
+    offsets.resize(offsets.size() + indices_end - indices_begin);
+    auto* dst_offsets_data = offsets.data();
+
+    for (auto x = indices_begin; x != indices_end; ++x) {

Review Comment:
   warning: 'auto x' can be declared as 'const auto *x' 
[readability-qualified-auto]
   
   ```suggestion
       for (const auto *x = indices_begin; x != indices_end; ++x) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_array_serde.cpp:
##########
@@ -326,7 +326,8 @@ Status DataTypeArraySerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeArraySerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeArraySerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeArraySerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_array_serde.cpp:331:
   ```diff
   -                                                int end, 
std::vector<StringRef>& buffer_list) const {
   +                                                int end, 
std::vector<StringRef>& buffer_list) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_hll_serde.cpp:
##########
@@ -186,7 +186,8 @@ Status DataTypeHLLSerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeHLLSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeHLLSerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeHLLSerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_hll_serde.cpp:191:
   ```diff
   -                                              int end, 
std::vector<StringRef>& buffer_list) const {
   +                                              int end, 
std::vector<StringRef>& buffer_list) {
   ```
   



##########
be/src/vec/common/hash_table/hash_map.h:
##########
@@ -193,10 +197,214 @@
     bool has_null_key_data() const { return false; }
 };
 
+template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = 
HashTableAllocator>
+class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, 
Allocator> {
+public:
+    using Self = JoinHashMapTable;
+    using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>;
+
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+    using mapped_type = typename Cell::Mapped;
+
+    using LookupResult = typename Base::LookupResult;
+
+    using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable;
+
+    static uint32_t calc_bucket_size(size_t num_elem) {
+        size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem 
- 1) / 7;
+        return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1;
+    }
+
+    template <int JoinOpType>
+    void prepare_build(size_t num_elem, int batch_size) {
+        max_batch_size = batch_size;
+        bucket_size = calc_bucket_size(num_elem + 1);
+        first.resize(bucket_size, 0);
+        next.resize(num_elem);
+
+        if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            visited.resize(num_elem, 0);
+        }
+    }
+
+    uint32_t get_bucket_size() const { return bucket_size; }
+
+    size_t size() const { return next.size(); }
+
+    void build(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
+               size_t num_elem) {
+        build_keys = keys;
+        for (size_t i = 1; i < num_elem; i++) {
+            uint32_t bucket_num = bucket_nums[i];
+            next[i] = first[bucket_num];
+            first[bucket_num] = i;
+        }
+    }
+
+    template <int JoinOpType>
+    auto find_batch(const Key* __restrict keys, const uint32_t* __restrict 
bucket_nums,
+                    int probe_idx, uint32_t build_idx, int probe_rows,
+                    uint32_t* __restrict probe_idxs, uint32_t* __restrict 
build_idxs) {
+        if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN ||
+                      JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
+            return _find_batch_inner_outer_join<JoinOpType>(keys, bucket_nums, 
probe_idx, build_idx,
+                                                            probe_rows, 
probe_idxs, build_idxs);
+        }
+        if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN) {
+            return _find_batch_left_semi_anti<JoinOpType>(keys, bucket_nums, 
probe_idx, probe_rows,
+                                                          probe_idxs);
+        }
+        if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
+                      JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
+            return _find_batch_right_semi_anti(keys, bucket_nums, probe_idx, 
probe_rows);
+        }
+        return std::tuple {0, 0u, 0};
+    }
+
+    template <int JoinOpType>
+    bool iterate_map(std::vector<uint32_t>& build_idxs) const {
+        const auto batch_size = max_batch_size;
+        const auto elem_num = visited.size();
+        int count = 0;
+        build_idxs.resize(batch_size);
+
+        while (count < batch_size && iter_idx < elem_num) {
+            const auto matched = visited[iter_idx];
+            build_idxs[count] = iter_idx;
+            if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN) {
+                count += !matched;
+            } else {
+                count += matched;
+            }
+            iter_idx++;
+        }
+
+        build_idxs.resize(count);
+        return iter_idx >= elem_num;
+    }
+
+private:
+    auto _find_batch_right_semi_anti(const Key* __restrict keys,
+                                     const uint32_t* __restrict bucket_nums, 
int probe_idx,
+                                     int probe_rows) {
+        while (probe_idx < probe_rows) {
+            auto build_idx = first[bucket_nums[probe_idx]];
+
+            while (build_idx) {
+                if (keys[probe_idx] == build_keys[build_idx]) {
+                    visited[build_idx] = 1;
+                }
+                build_idx = next[build_idx];
+            }
+            probe_idx++;
+        }
+        return std::tuple {probe_idx, 0u, 0};

Review Comment:
   warning: integer literal has suffix 'u', which is not uppercase 
[readability-uppercase-literal-suffix]
   
   ```suggestion
           return std::tuple {probe_idx, 0U, 0};
   ```
   



##########
be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h:
##########
@@ -100,8 +100,9 @@ class DataTypeFixedLengthObjectSerDe : public DataTypeSerDe 
{
         return Status::NotSupported("write_column_to_pb with type " + 
column.get_name());
     }
 
-    Status write_column_to_orc(const IColumn& column, const NullMap* null_map,
-                               orc::ColumnVectorBatch* orc_col_batch, int 
start, int end,
+    Status write_column_to_orc(const std::string& timezone, const IColumn& 
column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
       static Status write_column_to_orc(const std::string& timezone, const 
IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_fixedlengthobject_serde.h:105:
   ```diff
   -                                std::vector<StringRef>& buffer_list) const 
override {
   +                                std::vector<StringRef>& buffer_list) 
override {
   ```
   



##########
be/src/vec/data_types/serde/data_type_bitmap_serde.cpp:
##########
@@ -139,7 +139,8 @@ Status DataTypeBitMapSerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeBitMapSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeBitMapSerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeBitMapSerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_bitmap_serde.cpp:145:
   ```diff
   -                                                 std::vector<StringRef>& 
buffer_list) const {
   +                                                 std::vector<StringRef>& 
buffer_list) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_datev2_serde.cpp:
##########
@@ -144,7 +144,8 @@ Status DataTypeDateV2SerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeDateV2SerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_datev2_serde.cpp:150:
   ```diff
   -                                                 std::vector<StringRef>& 
buffer_list) const {
   +                                                 std::vector<StringRef>& 
buffer_list) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_map_serde.cpp:
##########
@@ -476,7 +476,8 @@ Status DataTypeMapSerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeMapSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeMapSerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeMapSerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_map_serde.cpp:481:
   ```diff
   -                                              int end, 
std::vector<StringRef>& buffer_list) const {
   +                                              int end, 
std::vector<StringRef>& buffer_list) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:
##########
@@ -115,7 +115,8 @@ void DataTypeJsonbSerDe::write_column_to_arrow(const 
IColumn& column, const Null
     }
 }
 
-Status DataTypeJsonbSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeJsonbSerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeJsonbSerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_jsonb_serde.cpp:120:
   ```diff
   -                                                int end, 
std::vector<StringRef>& buffer_list) const {
   +                                                int end, 
std::vector<StringRef>& buffer_list) {
   ```
   



##########
be/src/vec/data_types/serde/data_type_nullable_serde.cpp:
##########
@@ -317,7 +317,8 @@ Status DataTypeNullableSerDe::write_column_to_mysql(const 
IColumn& column,
     return _write_column_to_mysql(column, row_buffer, row_idx, col_const);
 }
 
-Status DataTypeNullableSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeNullableSerDe::write_column_to_orc(const std::string& timezone,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static 
   ```
   
   be/src/vec/data_types/serde/data_type_nullable_serde.cpp:323:
   ```diff
   - ,
   - {
   + ,
   + {
   ```
   



##########
be/src/vec/data_types/serde/data_type_object_serde.cpp:
##########
@@ -21,7 +21,8 @@
 namespace doris {
 
 namespace vectorized {
-Status DataTypeObjectSerDe::write_column_to_orc(const IColumn& column, const 
NullMap* null_map,
+Status DataTypeObjectSerDe::write_column_to_orc(const std::string& timezone, 
const IColumn& column,

Review Comment:
   warning: method 'write_column_to_orc' can be made static 
[readability-convert-member-functions-to-static]
   
   ```suggestion
   static Status DataTypeObjectSerDe::write_column_to_orc(const std::string& 
timezone, const IColumn& column,
   ```
   
   be/src/vec/data_types/serde/data_type_object_serde.cpp:27:
   ```diff
   -                                                 std::vector<StringRef>& 
buffer_list) const {
   +                                                 std::vector<StringRef>& 
buffer_list) {
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to