github-actions[bot] commented on code in PR #26089: URL: https://github.com/apache/doris/pull/26089#discussion_r1379870107
########## be/src/vec/common/hash_table/hash_map.h: ########## @@ -193,10 +197,229 @@ class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> { bool has_null_key_data() const { return false; } }; +template <typename Key, typename Cell, typename Hash = DefaultHash<Key>, + typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator> +class JoinHashMapTable : public HashMapTable<Key, Cell, Hash, Grower, Allocator> { +public: + using Self = JoinHashMapTable; + using Base = HashMapTable<Key, Cell, Hash, Grower, Allocator>; + + using key_type = Key; + using value_type = typename Cell::value_type; + using mapped_type = typename Cell::Mapped; + + using LookupResult = typename Base::LookupResult; + + using HashMapTable<Key, Cell, Hash, Grower, Allocator>::HashMapTable; + + static uint32_t calc_bucket_size(size_t num_elem) { + size_t expect_bucket_size = static_cast<size_t>(num_elem) + (num_elem - 1) / 7; + return phmap::priv::NormalizeCapacity(expect_bucket_size) + 1; + } + + template <int JoinOpType> + void prepare_build(size_t num_elem, int batch_size) { + max_batch_size = batch_size; + bucket_size = calc_bucket_size(num_elem + 1); + first.resize(bucket_size, 0); + next.resize(num_elem); + + if constexpr (JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || + JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN || + JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { + visited.resize(num_elem, 0); + } + } + + uint32_t get_bucket_size() const { return bucket_size; } + + size_t size() const { return next.size(); } + + std::vector<uint8_t>& get_visited() { return visited; } + + void build(const Key* __restrict keys, const uint32_t* __restrict bucket_nums, + size_t num_elem) { + build_keys = keys; + for (size_t i = 1; i < num_elem; i++) { + uint32_t bucket_num = bucket_nums[i]; + next[i] = first[bucket_num]; + first[bucket_num] = i; + } + } + + template <int JoinOpType, bool with_other_conjuncts> + auto find_batch(const Key* __restrict keys, const uint32_t* __restrict bucket_nums, + int probe_idx, uint32_t build_idx, int probe_rows, + uint32_t* __restrict probe_idxs, uint32_t* __restrict build_idxs) { + if constexpr (JoinOpType == doris::TJoinOp::INNER_JOIN || + JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN || + JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN || + JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) { + return _find_batch_inner_outer_join<JoinOpType, with_other_conjuncts>( + keys, bucket_nums, probe_idx, build_idx, probe_rows, probe_idxs, build_idxs); + } + if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN || + JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN) { + return _find_batch_left_semi_anti<JoinOpType>(keys, bucket_nums, probe_idx, probe_rows, + probe_idxs); + } + if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) { + return _find_batch_right_semi_anti<with_other_conjuncts>( + keys, bucket_nums, probe_idx, probe_rows, probe_idxs, build_idxs); + } + return std::tuple {0, 0u, 0}; + } + + template <int JoinOpType> + bool iterate_map(std::vector<uint32_t>& build_idxs) const { + const auto batch_size = max_batch_size; + const auto elem_num = visited.size(); + int count = 0; + build_idxs.resize(batch_size); + + while (count < batch_size && iter_idx < elem_num) { + const auto matched = visited[iter_idx]; + build_idxs[count] = iter_idx; + if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN) { + count += !matched; + } else { + count += matched; + } + iter_idx++; + } + + build_idxs.resize(count); + return iter_idx >= elem_num; + } + +private: + template <bool with_other_conjuncts> + auto _find_batch_right_semi_anti(const Key* __restrict keys, + const uint32_t* __restrict bucket_nums, int probe_idx, + int probe_rows, uint32_t* __restrict probe_idxs, + uint32_t* __restrict build_idxs) { + auto matched_cnt = 0; + while (probe_idx < probe_rows) { + auto build_idx = first[bucket_nums[probe_idx]]; + + while (build_idx) { + if (keys[probe_idx] == build_keys[build_idx]) { + if constexpr (with_other_conjuncts) { + build_idxs[matched_cnt] = build_idx; + probe_idxs[matched_cnt] = probe_idx; + matched_cnt++; + } else { + visited[build_idx] = 1; + } + } + build_idx = next[build_idx]; + } + probe_idx++; + } + return std::tuple {probe_idx, 0u, matched_cnt}; Review Comment: warning: integer literal has suffix 'u', which is not uppercase [readability-uppercase-literal-suffix] ```suggestion return std::tuple {probe_idx, 0U, matched_cnt}; ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org