This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new baf2689610 [Improvement](join) compute hash values by vectorized way (#13335) baf2689610 is described below commit baf2689610f09577c2552983bf0d09266e5623e8 Author: Gabriel <gabrielleeb...@gmail.com> AuthorDate: Thu Oct 13 16:04:58 2022 +0800 [Improvement](join) compute hash values by vectorized way (#13335) --- be/src/vec/common/columns_hashing.h | 6 ++--- be/src/vec/common/columns_hashing_impl.h | 5 ++++ be/src/vec/common/hash_table/hash_table.h | 15 +++++++++++ be/src/vec/exec/join/vhash_join_node.cpp | 42 +++++++++++++++++++++++++------ be/src/vec/exec/join/vhash_join_node.h | 12 +++++++++ 5 files changed, 70 insertions(+), 10 deletions(-) diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h index cc3e634d67..dfecead77e 100644 --- a/be/src/vec/common/columns_hashing.h +++ b/be/src/vec/common/columns_hashing.h @@ -131,9 +131,6 @@ struct HashMethodSerialized void set_serialized_keys(const StringRef* keys_) { keys = keys_; } -protected: - friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; - ALWAYS_INLINE KeyHolderType get_key_holder(size_t row, Arena& pool) const { if constexpr (keys_pre_serialized) { return KeyHolderType {keys[row], pool}; @@ -142,6 +139,9 @@ protected: serialize_keys_to_pool_contiguous(row, keys_size, key_columns, pool), pool}; } } + +protected: + friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>; }; template <typename HashMethod> diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index e2f03f26c6..13cc375c82 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -186,6 +186,11 @@ public: data.template prefetch<READ>(key_holder); } + template <bool READ, typename Data> + ALWAYS_INLINE void prefetch_by_hash(Data& data, size_t hash_value) { + data.template prefetch_by_hash<READ>(hash_value); + } + protected: Cache cache; diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index e588ed1b8a..8a10634304 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -908,6 +908,15 @@ public: __builtin_prefetch(&buf[place_value]); } + template <bool READ> + void ALWAYS_INLINE prefetch_by_hash(size_t hash_value) { + // Two optional arguments: + // 'rw': 1 means the memory access is write + // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality. + auto place_value = grower.place(hash_value); + __builtin_prefetch(&buf[place_value], READ ? 0 : 1, 1); + } + template <bool READ, typename KeyHolder> void ALWAYS_INLINE prefetch(KeyHolder& key_holder) { // Two optional arguments: @@ -967,6 +976,12 @@ public: emplace_non_zero(key_holder, it, inserted, hash_value); } + template <typename KeyHolder> + void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, size_t hash_value, + bool& inserted) { + emplace(key_holder, it, inserted, hash_value); + } + template <typename KeyHolder, typename Func> void ALWAYS_INLINE lazy_emplace(KeyHolder&& key_holder, LookupResult& it, Func&& f) { const auto& key = key_holder_get_key(key_holder); diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index dd77bc2a18..7134ec464d 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -45,7 +45,8 @@ struct ProcessHashTableBuild { _build_raw_ptrs(build_raw_ptrs), _join_node(join_node), _batch_size(batch_size), - _offset(offset) {} + _offset(offset), + _build_side_compute_hash_timer(join_node->_build_side_compute_hash_timer) {} template <bool ignore_null, bool build_unique, bool has_runtime_filter> void run(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map) { @@ -75,6 +76,26 @@ struct ProcessHashTableBuild { inserted_rows.reserve(_batch_size); } + _build_side_hash_values.resize(_rows); + auto& arena = _join_node->_arena; + { + SCOPED_TIMER(_build_side_compute_hash_timer); + for (size_t k = 0; k < _rows; ++k) { + if constexpr (ignore_null) { + if ((*null_map)[k]) { + continue; + } + } + if constexpr (IsSerializedHashTableContextTraits<KeyGetter>::value) { + _build_side_hash_values[k] = + hash_table_ctx.hash_table.hash(key_getter.get_key_holder(k, arena).key); + } else { + _build_side_hash_values[k] = + hash_table_ctx.hash_table.hash(key_getter.get_key_holder(k, arena)); + } + } + } + for (size_t k = 0; k < _rows; ++k) { if constexpr (ignore_null) { if ((*null_map)[k]) { @@ -82,11 +103,11 @@ struct ProcessHashTableBuild { } } - auto emplace_result = - key_getter.emplace_key(hash_table_ctx.hash_table, k, _join_node->_arena); + auto emplace_result = key_getter.emplace_key(hash_table_ctx.hash_table, + _build_side_hash_values[k], k, arena); if (k + PREFETCH_STEP < _rows) { - key_getter.template prefetch<false>(hash_table_ctx.hash_table, k + PREFETCH_STEP, - _join_node->_arena); + key_getter.template prefetch_by_hash<false>( + hash_table_ctx.hash_table, _build_side_hash_values[k + PREFETCH_STEP]); } if (emplace_result.is_inserted()) { @@ -128,6 +149,9 @@ private: HashJoinNode* _join_node; int _batch_size; uint8_t _offset; + + ProfileCounter* _build_side_compute_hash_timer; + std::vector<size_t> _build_side_hash_values; }; template <class HashTableContext> @@ -326,7 +350,6 @@ struct ProcessHashTableProbe { _arena)) {nullptr, false} : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena); - // prefetch is more useful while matching to multiple rows if (_probe_index + PREFETCH_STEP < _probe_rows) key_getter.template prefetch<true>(hash_table_ctx.hash_table, _probe_index + PREFETCH_STEP, _arena); @@ -445,7 +468,9 @@ struct ProcessHashTableProbe { ? decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena)) {nullptr, false} : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena); - + if (_probe_index + PREFETCH_STEP < _probe_rows) + key_getter.template prefetch<true>(hash_table_ctx.hash_table, + _probe_index + PREFETCH_STEP, _arena); if (find_result.is_found()) { auto& mapped = find_result.get_mapped(); auto origin_offset = current_offset; @@ -867,10 +892,12 @@ Status HashJoinNode::prepare(RuntimeState* state) { runtime_profile()->add_child(build_phase_profile, false, nullptr); _build_timer = ADD_TIMER(build_phase_profile, "BuildTime"); _build_table_timer = ADD_TIMER(build_phase_profile, "BuildTableTime"); + _build_side_merge_block_timer = ADD_TIMER(build_phase_profile, "BuildSideMergeBlockTime"); _build_table_insert_timer = ADD_TIMER(build_phase_profile, "BuildTableInsertTime"); _build_expr_call_timer = ADD_TIMER(build_phase_profile, "BuildExprCallTime"); _build_table_expanse_timer = ADD_TIMER(build_phase_profile, "BuildTableExpanseTime"); _build_rows_counter = ADD_COUNTER(build_phase_profile, "BuildRows", TUnit::UNIT); + _build_side_compute_hash_timer = ADD_TIMER(build_phase_profile, "BuildSideHashComputingTime"); // Probe phase auto probe_phase_profile = runtime_profile()->create_child("ProbePhase", true, true); @@ -1146,6 +1173,7 @@ Status HashJoinNode::_hash_table_build(RuntimeState* state) { _mem_used += block.allocated_bytes(); if (block.rows() != 0) { + SCOPED_TIMER(_build_side_merge_block_timer); mutable_block.merge(block); } diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 48cb54e67a..923999626d 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -50,6 +50,16 @@ struct SerializedHashTableContext { } }; +template <typename HashMethod> +struct IsSerializedHashTableContextTraits { + constexpr static bool value = false; +}; + +template <typename Value, typename Mapped> +struct IsSerializedHashTableContextTraits<ColumnsHashing::HashMethodSerialized<Value, Mapped>> { + constexpr static bool value = true; +}; + // T should be UInt32 UInt64 UInt128 template <class T> struct PrimaryTypeHashTableContext { @@ -203,6 +213,8 @@ private: RuntimeProfile::Counter* _search_hashtable_timer; RuntimeProfile::Counter* _build_side_output_timer; RuntimeProfile::Counter* _probe_side_output_timer; + RuntimeProfile::Counter* _build_side_compute_hash_timer; + RuntimeProfile::Counter* _build_side_merge_block_timer; RuntimeProfile::Counter* _join_filter_timer; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org