This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new dfe308f501 [Improvement](join) refine prefetch strategy (#13286)
dfe308f501 is described below

commit dfe308f501fe7acb927b00ee2b9c1096f49019d9
Author: Gabriel <gabrielleeb...@gmail.com>
AuthorDate: Wed Oct 12 19:02:06 2022 +0800

    [Improvement](join) refine prefetch strategy (#13286)
---
 be/src/vec/common/columns_hashing_impl.h  |  6 ++++++
 be/src/vec/common/hash_table/hash_table.h | 11 +++++++++++
 be/src/vec/exec/join/vhash_join_node.cpp  | 18 +++++++++++-------
 3 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/common/columns_hashing_impl.h 
b/be/src/vec/common/columns_hashing_impl.h
index 7bf4fd3132..e2f03f26c6 100644
--- a/be/src/vec/common/columns_hashing_impl.h
+++ b/be/src/vec/common/columns_hashing_impl.h
@@ -180,6 +180,12 @@ public:
         data.prefetch(key_holder);
     }
 
+    template <bool READ, typename Data>
+    ALWAYS_INLINE void prefetch(Data& data, size_t row, Arena& pool) {
+        auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, 
pool);
+        data.template prefetch<READ>(key_holder);
+    }
+
 protected:
     Cache cache;
 
diff --git a/be/src/vec/common/hash_table/hash_table.h 
b/be/src/vec/common/hash_table/hash_table.h
index a59cf972ff..e588ed1b8a 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -908,6 +908,17 @@ public:
         __builtin_prefetch(&buf[place_value]);
     }
 
+    template <bool READ, typename KeyHolder>
+    void ALWAYS_INLINE prefetch(KeyHolder& key_holder) {
+        // Two optional arguments:
+        // 'rw': 1 means the memory access is write
+        // 'locality': 0-3. 0 means no temporal locality. 3 means high 
temporal locality.
+        const auto& key = key_holder_get_key(key_holder);
+        auto hash_value = hash(key);
+        auto place_value = grower.place(hash_value);
+        __builtin_prefetch(&buf[place_value], READ ? 0 : 1, 1);
+    }
+
     /// Reinsert node pointed to by iterator
     void ALWAYS_INLINE reinsert(iterator& it, size_t hash_value) {
         reinsert(*it.get_ptr(), hash_value);
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 05040fc2b6..dd77bc2a18 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -30,6 +30,10 @@
 
 namespace doris::vectorized {
 
+// TODO: Best prefetch step is decided by machine. We should also provide a
+//  SQL hint to allow users to tune by hand.
+static constexpr int PREFETCH_STEP = 64;
+
 using ProfileCounter = RuntimeProfile::Counter;
 template <class HashTableContext>
 struct ProcessHashTableBuild {
@@ -80,8 +84,9 @@ struct ProcessHashTableBuild {
 
             auto emplace_result =
                     key_getter.emplace_key(hash_table_ctx.hash_table, k, 
_join_node->_arena);
-            if (k + 1 < _rows) {
-                key_getter.prefetch(hash_table_ctx.hash_table, k + 1, 
_join_node->_arena);
+            if (k + PREFETCH_STEP < _rows) {
+                key_getter.template prefetch<false>(hash_table_ctx.hash_table, 
k + PREFETCH_STEP,
+                                                    _join_node->_arena);
             }
 
             if (emplace_result.is_inserted()) {
@@ -321,6 +326,10 @@ struct ProcessHashTableProbe {
                                                                           
_arena)) {nullptr, false}
                                            : 
key_getter.find_key(hash_table_ctx.hash_table,
                                                                  _probe_index, 
_arena);
+                // prefetch is more useful while matching to multiple rows
+                if (_probe_index + PREFETCH_STEP < _probe_rows)
+                    key_getter.template 
prefetch<true>(hash_table_ctx.hash_table,
+                                                       _probe_index + 
PREFETCH_STEP, _arena);
 
                 if constexpr (JoinOpType::value == TJoinOp::LEFT_ANTI_JOIN) {
                     if (!find_result.is_found()) {
@@ -344,11 +353,6 @@ struct ProcessHashTableProbe {
                                 ++current_offset;
                             }
                         } else {
-                            // prefetch is more useful while matching to 
multiple rows
-                            if (_probe_index + 2 < _probe_rows)
-                                key_getter.prefetch(hash_table_ctx.hash_table, 
_probe_index + 2,
-                                                    _arena);
-
                             for (auto it = mapped.begin(); it.ok(); ++it) {
                                 if constexpr (!is_right_semi_anti_join) {
                                     if (current_offset < _batch_size) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to