This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new dfe308f501 [Improvement](join) refine prefetch strategy (#13286) dfe308f501 is described below commit dfe308f501fe7acb927b00ee2b9c1096f49019d9 Author: Gabriel <gabrielleeb...@gmail.com> AuthorDate: Wed Oct 12 19:02:06 2022 +0800 [Improvement](join) refine prefetch strategy (#13286) --- be/src/vec/common/columns_hashing_impl.h | 6 ++++++ be/src/vec/common/hash_table/hash_table.h | 11 +++++++++++ be/src/vec/exec/join/vhash_join_node.cpp | 18 +++++++++++------- 3 files changed, 28 insertions(+), 7 deletions(-) diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h index 7bf4fd3132..e2f03f26c6 100644 --- a/be/src/vec/common/columns_hashing_impl.h +++ b/be/src/vec/common/columns_hashing_impl.h @@ -180,6 +180,12 @@ public: data.prefetch(key_holder); } + template <bool READ, typename Data> + ALWAYS_INLINE void prefetch(Data& data, size_t row, Arena& pool) { + auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool); + data.template prefetch<READ>(key_holder); + } + protected: Cache cache; diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index a59cf972ff..e588ed1b8a 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -908,6 +908,17 @@ public: __builtin_prefetch(&buf[place_value]); } + template <bool READ, typename KeyHolder> + void ALWAYS_INLINE prefetch(KeyHolder& key_holder) { + // Two optional arguments: + // 'rw': 1 means the memory access is write + // 'locality': 0-3. 0 means no temporal locality. 3 means high temporal locality. + const auto& key = key_holder_get_key(key_holder); + auto hash_value = hash(key); + auto place_value = grower.place(hash_value); + __builtin_prefetch(&buf[place_value], READ ? 0 : 1, 1); + } + /// Reinsert node pointed to by iterator void ALWAYS_INLINE reinsert(iterator& it, size_t hash_value) { reinsert(*it.get_ptr(), hash_value); diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 05040fc2b6..dd77bc2a18 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -30,6 +30,10 @@ namespace doris::vectorized { +// TODO: Best prefetch step is decided by machine. We should also provide a +// SQL hint to allow users to tune by hand. +static constexpr int PREFETCH_STEP = 64; + using ProfileCounter = RuntimeProfile::Counter; template <class HashTableContext> struct ProcessHashTableBuild { @@ -80,8 +84,9 @@ struct ProcessHashTableBuild { auto emplace_result = key_getter.emplace_key(hash_table_ctx.hash_table, k, _join_node->_arena); - if (k + 1 < _rows) { - key_getter.prefetch(hash_table_ctx.hash_table, k + 1, _join_node->_arena); + if (k + PREFETCH_STEP < _rows) { + key_getter.template prefetch<false>(hash_table_ctx.hash_table, k + PREFETCH_STEP, + _join_node->_arena); } if (emplace_result.is_inserted()) { @@ -321,6 +326,10 @@ struct ProcessHashTableProbe { _arena)) {nullptr, false} : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena); + // prefetch is more useful while matching to multiple rows + if (_probe_index + PREFETCH_STEP < _probe_rows) + key_getter.template prefetch<true>(hash_table_ctx.hash_table, + _probe_index + PREFETCH_STEP, _arena); if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) { if (!find_result.is_found()) { @@ -344,11 +353,6 @@ struct ProcessHashTableProbe { ++current_offset; } } else { - // prefetch is more useful while matching to multiple rows - if (_probe_index + 2 < _probe_rows) - key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 2, - _arena); - for (auto it = mapped.begin(); it.ok(); ++it) { if constexpr (!is_right_semi_anti_join) { if (current_offset < _batch_size) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org