This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit a5ca8833d7ad2ecbcee1962cc6e36f00bdfcffdc
Author: Pxl <pxl...@qq.com>
AuthorDate: Thu Jan 18 16:47:03 2024 +0800

    [Improvement](aggregate) optimize for small string aggregate (#29919)
---
 be/src/pipeline/exec/aggregation_sink_operator.cpp |   4 +-
 .../pipeline/exec/aggregation_source_operator.cpp  |   9 +-
 .../pipeline/exec/partition_sort_sink_operator.cpp |   4 +-
 be/src/vec/common/hash_table/hash_map.h            |   6 -
 be/src/vec/common/hash_table/hash_map_context.h    |  20 +-
 be/src/vec/common/hash_table/hash_table.h          |  28 ---
 .../vec/common/hash_table/partitioned_hash_table.h |  11 --
 be/src/vec/common/hash_table/ph_hash_map.h         |   8 -
 be/src/vec/common/hash_table/string_hash_map.h     |  66 +++----
 be/src/vec/common/hash_table/string_hash_table.h   | 219 +++++++++++----------
 be/src/vec/common/memcpy_small.h                   |  32 +++
 be/src/vec/exec/distinct_vaggregation_node.cpp     |   4 +-
 be/src/vec/exec/vaggregation_node.cpp              |  30 +--
 be/src/vec/exec/vpartition_sort_node.cpp           |   2 +-
 14 files changed, 202 insertions(+), 241 deletions(-)

diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp 
b/be/src/pipeline/exec/aggregation_sink_operator.cpp
index 3ba4dd05dc7..8d3a70a3a3a 100644
--- a/be/src/pipeline/exec/aggregation_sink_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp
@@ -536,7 +536,7 @@ void AggSinkLocalState<DependencyType, 
Derived>::_emplace_into_hash_table(
                 agg_method.init_serialized_keys(key_columns, num_rows);
 
                 auto creator = [this](const auto& ctor, auto& key, auto& 
origin) {
-                    HashMethodType::try_presis_key(key, origin, 
*_agg_arena_pool);
+                    HashMethodType::try_presis_key_and_origin(key, origin, 
*_agg_arena_pool);
                     auto mapped =
                             
Base::_shared_state->aggregate_data_container->append_data(origin);
                     auto st = _create_agg_status(mapped);
@@ -686,7 +686,7 @@ Status AggSinkLocalState<DependencyType, 
Derived>::_reset_hash_table() {
                         ((ss.total_size_of_aggregate_states + 
ss.align_aggregate_states - 1) /
                          ss.align_aggregate_states) *
                                 ss.align_aggregate_states));
-                hash_table = HashTableType();
+                agg_method.hash_table.reset(new HashTableType());
                 ss.agg_arena_pool.reset(new vectorized::Arena);
                 return Status::OK();
             },
diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp 
b/be/src/pipeline/exec/aggregation_source_operator.cpp
index dc476cf63a0..7e2fca9bc67 100644
--- a/be/src/pipeline/exec/aggregation_source_operator.cpp
+++ b/be/src/pipeline/exec/aggregation_source_operator.cpp
@@ -17,6 +17,7 @@
 
 #include "aggregation_source_operator.h"
 
+#include <memory>
 #include <string>
 
 #include "common/exception.h"
@@ -141,13 +142,13 @@ Status AggLocalState::_reset_hash_table() {
                     }
                 });
 
-                ss.aggregate_data_container.reset(new 
vectorized::AggregateDataContainer(
+                ss.aggregate_data_container = 
std::make_unique<vectorized::AggregateDataContainer>(
                         sizeof(typename HashTableType::key_type),
                         ((ss.total_size_of_aggregate_states + 
ss.align_aggregate_states - 1) /
                          ss.align_aggregate_states) *
-                                ss.align_aggregate_states));
-                hash_table = HashTableType();
-                ss.agg_arena_pool.reset(new vectorized::Arena);
+                                ss.align_aggregate_states);
+                agg_method.hash_table.reset(new HashTableType());
+                ss.agg_arena_pool = std::make_unique<vectorized::Arena>();
                 return Status::OK();
             },
             ss.agg_data->method_variant);
diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp 
b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
index 478a3eedb20..14d917d62e3 100644
--- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp
+++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp
@@ -189,7 +189,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table(
 
                 auto creator = [&](const auto& ctor, auto& key, auto& origin) {
                     HashMethodType::try_presis_key(key, origin, 
*local_state._agg_arena_pool);
-                    auto aggregate_data = _pool->add(new 
vectorized::PartitionBlocks());
+                    auto* aggregate_data = _pool->add(new 
vectorized::PartitionBlocks());
                     local_state._value_places.push_back(aggregate_data);
                     ctor(key, aggregate_data);
                     local_state._num_partition++;
@@ -206,7 +206,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table(
                             agg_method.lazy_emplace(state, row, creator, 
creator_for_null_key);
                     mapped->add_row_idx(row);
                 }
-                for (auto place : local_state._value_places) {
+                for (auto* place : local_state._value_places) {
                     SCOPED_TIMER(local_state._selector_block_timer);
                     place->append_block_by_selector(input_block, 
_child_x->row_desc(),
                                                     _has_global_limit, 
_partition_inner_limit,
diff --git a/be/src/vec/common/hash_table/hash_map.h 
b/be/src/vec/common/hash_table/hash_map.h
index bd7c413dd97..382f46acb74 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -155,12 +155,6 @@ public:
 
     using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
 
-    /// Call func(const Key &, Mapped &) for each hash map element.
-    template <typename Func>
-    void for_each_value(Func&& func) {
-        for (auto& v : *this) func(v.get_first(), v.get_second());
-    }
-
     /// Call func(Mapped &) for each hash map element.
     template <typename Func>
     void for_each_mapped(Func&& func) {
diff --git a/be/src/vec/common/hash_table/hash_map_context.h 
b/be/src/vec/common/hash_table/hash_map_context.h
index 3784379902b..d96aa2d7c65 100644
--- a/be/src/vec/common/hash_table/hash_map_context.h
+++ b/be/src/vec/common/hash_table/hash_map_context.h
@@ -128,20 +128,22 @@ struct MethodBase {
 
     template <typename State>
     ALWAYS_INLINE auto find(State& state, size_t i) {
-        prefetch<true>(i);
+        if constexpr (!is_string_hash_map()) {
+            prefetch<true>(i);
+        }
         return state.find_key_with_hash(*hash_table, hash_values[i], keys[i]);
     }
 
     template <typename State, typename F, typename FF>
     ALWAYS_INLINE auto& lazy_emplace(State& state, size_t i, F&& creator,
                                      FF&& creator_for_null_key) {
-        prefetch<false>(i);
+        if constexpr (!is_string_hash_map()) {
+            prefetch<false>(i);
+        }
         return state.lazy_emplace_key(*hash_table, i, keys[i], hash_values[i], 
creator,
                                       creator_for_null_key);
     }
 
-    static constexpr bool need_presis() { return std::is_same_v<Key, 
StringRef>; }
-
     static constexpr bool is_string_hash_map() {
         return std::is_same_v<StringHashMap<Mapped>, HashMap> ||
                std::is_same_v<DataWithNullKey<StringHashMap<Mapped>>, HashMap>;
@@ -149,7 +151,14 @@ struct MethodBase {
 
     template <typename Key, typename Origin>
     static void try_presis_key(Key& key, Origin& origin, Arena& arena) {
-        if constexpr (need_presis()) {
+        if constexpr (std::is_same_v<Key, StringRef>) {
+            key.data = arena.insert(key.data, key.size);
+        }
+    }
+
+    template <typename Key, typename Origin>
+    static void try_presis_key_and_origin(Key& key, Origin& origin, Arena& 
arena) {
+        if constexpr (std::is_same_v<Origin, StringRef>) {
             origin.data = arena.insert(origin.data, origin.size);
             if constexpr (!is_string_hash_map()) {
                 key = origin;
@@ -303,7 +312,6 @@ struct MethodOneNumber : public MethodBase<TData> {
                                                     ->get_raw_data()
                                                     .data
                                           : 
key_columns[0]->get_raw_data().data);
-        std::string name = key_columns[0]->get_name();
         if (is_join) {
             Base::init_join_bucket_num(num_rows, bucket_size, null_map);
         } else {
diff --git a/be/src/vec/common/hash_table/hash_table.h 
b/be/src/vec/common/hash_table/hash_table.h
index fcc682a49cb..7e669f275e2 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -353,29 +353,6 @@ public:
     }
 };
 
-static_assert(sizeof(HashTableGrowerWithPrecalculation<>) == 64);
-
-/** When used as a Grower, it turns a hash table into something like a lookup 
table.
-  * It remains non-optimal - the cells store the keys.
-  * Also, the compiler can not completely remove the code of passing through 
the collision resolution chain, although it is not needed.
-  * TODO Make a proper lookup table.
-  */
-template <size_t key_bits>
-struct HashTableFixedGrower {
-    size_t buf_size() const { return 1ULL << key_bits; }
-    size_t place(size_t x) const { return x; }
-    /// You could write __builtin_unreachable(), but the compiler does not 
optimize everything, and it turns out less efficiently.
-    size_t next(size_t pos) const { return pos + 1; }
-    bool overflow(size_t /*elems*/) const { return false; }
-
-    void increase_size() {
-        LOG(FATAL) << "__builtin_unreachable";
-        __builtin_unreachable();
-    }
-    void set(size_t /*num_elems*/) {}
-    void set_buf_size(size_t /*buf_size_*/) {}
-};
-
 /** If you want to store the zero key separately - a place to store it. */
 template <bool need_zero_value_storage, typename Cell>
 struct ZeroValueStorage;
@@ -573,11 +550,6 @@ protected:
         auto get_ptr() const { return ptr; }
         size_t get_hash() const { return ptr->get_hash(*container); }
 
-        size_t get_collision_chain_length() const {
-            return container->grower.place((ptr - container->buf) -
-                                           
container->grower.place(get_hash()));
-        }
-
         /**
           * A hack for HashedDictionary.
           *
diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h 
b/be/src/vec/common/hash_table/partitioned_hash_table.h
index c7626f1fc84..c6a19b36d3a 100644
--- a/be/src/vec/common/hash_table/partitioned_hash_table.h
+++ b/be/src/vec/common/hash_table/partitioned_hash_table.h
@@ -97,17 +97,6 @@ public:
         }
     }
 
-    template <typename Func>
-    void ALWAYS_INLINE for_each_value(Func&& func) {
-        if (_is_partitioned) {
-            for (auto i = 0u; i < NUM_LEVEL1_SUB_TABLES; ++i) {
-                level1_sub_tables[i].for_each_value(func);
-            }
-        } else {
-            level0_sub_table.for_each_value(func);
-        }
-    }
-
     size_t size() {
         size_t count = 0;
         if (_is_partitioned) {
diff --git a/be/src/vec/common/hash_table/ph_hash_map.h 
b/be/src/vec/common/hash_table/ph_hash_map.h
index f1db30f41a5..8f61ee966c5 100644
--- a/be/src/vec/common/hash_table/ph_hash_map.h
+++ b/be/src/vec/common/hash_table/ph_hash_map.h
@@ -102,8 +102,6 @@ public:
 
         auto get_ptr() const { return this; }
         size_t get_hash() const { return base_iterator->get_hash(); }
-
-        size_t get_collision_chain_length() const { return 0; }
     };
 
     class iterator : public iterator_base<iterator, false> {
@@ -202,12 +200,6 @@ public:
         _hash_map.prefetch_hash(hash_value);
     }
 
-    /// Call func(const Key &, Mapped &) for each hash map element.
-    template <typename Func>
-    void for_each_value(Func&& func) {
-        for (auto& v : *this) func(v.get_first(), v.get_second());
-    }
-
     /// Call func(Mapped &) for each hash map element.
     template <typename Func>
     void for_each_mapped(Func&& func) {
diff --git a/be/src/vec/common/hash_table/string_hash_map.h 
b/be/src/vec/common/hash_table/string_hash_map.h
index 97f63553f43..f1efd0fab12 100644
--- a/be/src/vec/common/hash_table/string_hash_map.h
+++ b/be/src/vec/common/hash_table/string_hash_map.h
@@ -55,26 +55,6 @@ struct StringHashMapCell<StringKey16, TMapped>
     static const StringKey16& get_key(const value_type& value_) { return 
value_.first; }
 };
 
-template <typename TMapped>
-struct StringHashMapCell<StringKey24, TMapped>
-        : public HashMapCell<StringKey24, TMapped, StringHashTableHash, 
HashTableNoState> {
-    using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, 
HashTableNoState>;
-    using value_type = typename Base::value_type;
-    using Base::Base;
-    static constexpr bool need_zero_value_storage = false;
-    bool is_zero(const HashTableNoState& state) const { return 
is_zero(this->value.first, state); }
-
-    // Zero means unoccupied cells in hash table. Use key with last word = 0 as
-    // zero keys, because such keys are unrepresentable (no way to encode 
length).
-    static bool is_zero(const StringKey24& key, const HashTableNoState&) { 
return key.c == 0; }
-    void set_zero() { this->value.first.c = 0; }
-
-    // external
-    const doris::StringRef get_key() const { return 
to_string_ref(this->value.first); } /// NOLINT
-    // internal
-    static const StringKey24& get_key(const value_type& value_) { return 
value_.first; }
-};
-
 template <typename TMapped>
 struct StringHashMapCell<doris::StringRef, TMapped>
         : public HashMapCellWithSavedHash<doris::StringRef, TMapped, 
StringHashTableHash,
@@ -108,11 +88,17 @@ struct StringHashMapCell<doris::StringRef, TMapped>
 template <typename TMapped, typename Allocator>
 struct StringHashMapSubMaps {
     using T0 = StringHashTableEmpty<StringHashMapCell<doris::StringRef, 
TMapped>>;
-    using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, 
TMapped>, StringHashTableHash,
-                            StringHashTableGrower<>, Allocator>;
-    using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, 
TMapped>,
+    using T1 = HashMapTable<StringHashMapSubKeys::T1,
+                            StringHashMapCell<StringHashMapSubKeys::T1, 
TMapped>,
+                            StringHashTableHash, StringHashTableGrower<4>, 
Allocator>;
+    using T2 = HashMapTable<StringHashMapSubKeys::T2,
+                            StringHashMapCell<StringHashMapSubKeys::T2, 
TMapped>,
+                            StringHashTableHash, StringHashTableGrower<>, 
Allocator>;
+    using T3 = HashMapTable<StringHashMapSubKeys::T3,
+                            StringHashMapCell<StringHashMapSubKeys::T3, 
TMapped>,
                             StringHashTableHash, StringHashTableGrower<>, 
Allocator>;
-    using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, 
TMapped>,
+    using T4 = HashMapTable<StringHashMapSubKeys::T4,
+                            StringHashMapCell<StringHashMapSubKeys::T4, 
TMapped>,
                             StringHashTableHash, StringHashTableGrower<>, 
Allocator>;
     using Ts = HashMapTable<doris::StringRef, 
StringHashMapCell<doris::StringRef, TMapped>,
                             StringHashTableHash, StringHashTableGrower<>, 
Allocator>;
@@ -132,42 +118,34 @@ public:
         LookupResult it;
         bool inserted;
         this->emplace(x, it, inserted);
-        if (inserted) new (&it->get_mapped()) TMapped();
+        if (inserted) {
+            new (&it->get_mapped()) TMapped();
+        }
 
         return it->get_mapped();
     }
 
     template <typename Func>
-    void ALWAYS_INLINE for_each_value(Func&& func) {
+    void ALWAYS_INLINE for_each_mapped(Func&& func) {
         if (this->m0.size()) {
-            func(doris::StringRef {}, this->m0.zero_value()->get_second());
+            func(this->m0.zero_value()->get_second());
         }
-
         for (auto& v : this->m1) {
-            func(v.get_key(), v.get_second());
+            func(v.get_second());
         }
-
         for (auto& v : this->m2) {
-            func(v.get_key(), v.get_second());
+            func(v.get_second());
         }
-
         for (auto& v : this->m3) {
-            func(v.get_key(), v.get_second());
+            func(v.get_second());
+        }
+        for (auto& v : this->m4) {
+            func(v.get_second());
         }
-
         for (auto& v : this->ms) {
-            func(v.get_key(), v.get_second());
+            func(v.get_second());
         }
     }
-
-    template <typename Func>
-    void ALWAYS_INLINE for_each_mapped(Func&& func) {
-        if (this->m0.size()) func(this->m0.zero_value()->get_second());
-        for (auto& v : this->m1) func(v.get_second());
-        for (auto& v : this->m2) func(v.get_second());
-        for (auto& v : this->m3) func(v.get_second());
-        for (auto& v : this->ms) func(v.get_second());
-    }
     template <typename MappedType>
     char* get_null_key_data() {
         return nullptr;
diff --git a/be/src/vec/common/hash_table/string_hash_table.h 
b/be/src/vec/common/hash_table/string_hash_table.h
index 09e326761c4..5d98ff0b720 100644
--- a/be/src/vec/common/hash_table/string_hash_table.h
+++ b/be/src/vec/common/hash_table/string_hash_table.h
@@ -24,41 +24,43 @@
 #include <variant>
 
 #include "vec/common/hash_table/hash.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/common/memcpy_small.h"
 
+using StringKey2 = doris::vectorized::UInt16;
+using StringKey4 = doris::vectorized::UInt32;
 using StringKey8 = doris::vectorized::UInt64;
 using StringKey16 = doris::vectorized::UInt128;
-struct StringKey24 {
-    doris::vectorized::UInt64 a;
-    doris::vectorized::UInt64 b;
-    doris::vectorized::UInt64 c;
 
-    bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == 
rhs.b && c == rhs.c; }
+struct StringHashMapSubKeys {
+    using T1 = StringKey2;
+    using T2 = StringKey4;
+    using T3 = StringKey8;
+    using T4 = StringKey16;
 };
 
 template <typename StringKey>
-StringKey toStringKey(const doris::StringRef& key) {
+StringKey to_string_key(const doris::StringRef& key) {
     DCHECK_LE(key.size, sizeof(StringKey));
     StringKey string_key {};
-    memcpy((char*)&string_key, key.data, key.size);
+    memcpy_small<sizeof(StringKey)>((char*)&string_key, key.data, key.size);
     return string_key;
 }
 
-inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) {
+template <typename T>
+inline doris::StringRef ALWAYS_INLINE to_string_ref(const T& n) {
     assert(n != 0);
-    return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 
3)};
+    return {reinterpret_cast<const char*>(&n), sizeof(T) - (__builtin_clzll(n) 
>> 3)};
 }
 inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) {
     assert(n.high != 0);
     return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) 
>> 3)};
 }
-inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) {
-    assert(n.c != 0);
-    return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 
3)};
-}
 
 struct StringHashTableHash {
 #if defined(__SSE4_2__) || defined(__aarch64__)
-    size_t ALWAYS_INLINE operator()(StringKey8 key) const {
+    template <typename T>
+    size_t ALWAYS_INLINE operator()(T key) const {
         size_t res = -1ULL;
         res = _mm_crc32_u64(res, key);
         return res;
@@ -69,31 +71,23 @@ struct StringHashTableHash {
         res = _mm_crc32_u64(res, key.high);
         return res;
     }
-    size_t ALWAYS_INLINE operator()(StringKey24 key) const {
-        size_t res = -1ULL;
-        res = _mm_crc32_u64(res, key.a);
-        res = _mm_crc32_u64(res, key.b);
-        res = _mm_crc32_u64(res, key.c);
-        return res;
-    }
 #else
-    size_t ALWAYS_INLINE operator()(StringKey8 key) const {
-        return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 8);
-    }
-    size_t ALWAYS_INLINE operator()(StringKey16 key) const {
-        return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 16);
-    }
-    size_t ALWAYS_INLINE operator()(StringKey24 key) const {
-        return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24);
+    template <typename T>
+    size_t ALWAYS_INLINE operator()(T key) const {
+        return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 
sizeof(T));
     }
 #endif
     size_t ALWAYS_INLINE operator()(doris::StringRef key) const {
-        if (key.size <= 8) {
-            return StringHashTableHash()(toStringKey<StringKey8>(key));
-        } else if (key.size <= 16) {
-            return StringHashTableHash()(toStringKey<StringKey16>(key));
-        } else if (key.size <= 24) {
-            return StringHashTableHash()(toStringKey<StringKey24>(key));
+        if (key.size == 0) {
+            return 0;
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T1)) {
+            return 
StringHashTableHash()(to_string_key<StringHashMapSubKeys::T1>(key));
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T2)) {
+            return 
StringHashTableHash()(to_string_key<StringHashMapSubKeys::T2>(key));
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T3)) {
+            return 
StringHashTableHash()(to_string_key<StringHashMapSubKeys::T3>(key));
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T4)) {
+            return 
StringHashTableHash()(to_string_key<StringHashMapSubKeys::T4>(key));
         }
         return doris::StringRefHash()(key);
     }
@@ -194,10 +188,9 @@ struct StringHashTableGrower : public 
HashTableGrowerWithPrecalculation<initial_
 template <typename Mapped>
 struct StringHashTableLookupResult {
     Mapped* mapped_ptr;
-    StringHashTableLookupResult() : mapped_ptr(nullptr) {}                     
   /// NOLINT
-    StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) 
{} /// NOLINT
-    StringHashTableLookupResult(std::nullptr_t) {}                             
   /// NOLINT
-    const VoidKey getKey() const { return {}; }                                
   /// NOLINT
+    StringHashTableLookupResult() : mapped_ptr(nullptr) {}
+    StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) 
{}
+    StringHashTableLookupResult(std::nullptr_t) {}
     auto& get_mapped() { return *mapped_ptr; }
     auto& operator*() { return *this; }
     auto& operator*() const { return *this; }
@@ -226,7 +219,6 @@ ALWAYS_INLINE inline auto 
lookup_result_get_mapped(StringHashTableLookupResult<M
 template <typename SubMaps>
 class StringHashTable : private boost::noncopyable {
 protected:
-    static constexpr size_t NUM_MAPS = 5;
     // Map for storing empty string
     using T0 = typename SubMaps::T0;
 
@@ -234,18 +226,17 @@ protected:
     using T1 = typename SubMaps::T1;
     using T2 = typename SubMaps::T2;
     using T3 = typename SubMaps::T3;
+    using T4 = typename SubMaps::T4;
 
     // Long strings are stored as doris::StringRef along with saved hash
     using Ts = typename SubMaps::Ts;
     using Self = StringHashTable;
 
-    template <typename, typename, size_t>
-    friend class TwoLevelStringHashTable;
-
     T0 m0;
     T1 m1;
     T2 m2;
     T3 m3;
+    T4 m4;
     Ts ms;
 
     using Cell = typename Ts::cell_type;
@@ -259,7 +250,8 @@ protected:
         typename T1::iterator iterator1;
         typename T2::iterator iterator2;
         typename T3::iterator iterator3;
-        typename Ts::iterator iterator4;
+        typename T4::iterator iterator4;
+        typename Ts::iterator iterator5;
 
         typename Ts::cell_type cell;
 
@@ -269,39 +261,52 @@ protected:
         iterator_base() = default;
         iterator_base(Container* container_, bool end = false) : 
container(container_) {
             if (end) {
-                sub_table_index = 4;
-                iterator4 = container->ms.end();
+                sub_table_index = 5;
+                iterator5 = container->ms.end();
             } else {
                 sub_table_index = 0;
-                if (container->m0.size() == 0)
+                if (container->m0.size() == 0) {
                     sub_table_index++;
-                else
+                } else {
                     return;
+                }
 
                 iterator1 = container->m1.begin();
-                if (iterator1 == container->m1.end())
+                if (iterator1 == container->m1.end()) {
                     sub_table_index++;
-                else
+                } else {
                     return;
+                }
 
                 iterator2 = container->m2.begin();
-                if (iterator2 == container->m2.end())
+                if (iterator2 == container->m2.end()) {
                     sub_table_index++;
-                else
+                } else {
                     return;
+                }
 
                 iterator3 = container->m3.begin();
-                if (iterator3 == container->m3.end())
+                if (iterator3 == container->m3.end()) {
+                    sub_table_index++;
+                } else {
+                    return;
+                }
+
+                iterator4 = container->m4.begin();
+                if (iterator4 == container->m4.end()) {
                     sub_table_index++;
-                else
+                } else {
                     return;
+                }
 
-                iterator4 = container->ms.begin();
+                iterator5 = container->ms.begin();
             }
         }
 
         bool operator==(const iterator_base& rhs) const {
-            if (sub_table_index != rhs.sub_table_index) return false;
+            if (sub_table_index != rhs.sub_table_index) {
+                return false;
+            }
             switch (sub_table_index) {
             case 0: {
                 return true;
@@ -318,6 +323,9 @@ protected:
             case 4: {
                 return iterator4 == rhs.iterator4;
             }
+            case 5: {
+                return iterator5 == rhs.iterator5;
+            }
             }
             LOG(FATAL) << "__builtin_unreachable";
             __builtin_unreachable();
@@ -355,6 +363,13 @@ protected:
             }
             case 4: {
                 ++iterator4;
+                if (iterator4 == container->m4.end()) {
+                    need_switch_to_next = true;
+                }
+                break;
+            }
+            case 5: {
+                ++iterator5;
                 break;
             }
             }
@@ -385,7 +400,14 @@ protected:
                     break;
                 }
                 case 4: {
-                    iterator4 = container->ms.begin();
+                    iterator4 = container->m4.begin();
+                    if (iterator4 == container->m4.end()) {
+                        need_switch_to_next = true;
+                    }
+                    break;
+                }
+                case 5: {
+                    iterator5 = container->ms.begin();
                     break;
                 }
                 }
@@ -416,6 +438,10 @@ protected:
                 const_cast<iterator_base*>(this)->cell = *iterator4;
                 break;
             }
+            case 5: {
+                const_cast<iterator_base*>(this)->cell = *iterator5;
+                break;
+            }
             }
             return cell;
         }
@@ -438,13 +464,14 @@ protected:
                 return iterator3->get_hash(container->m3);
             }
             case 4: {
-                return iterator4->get_hash(container->ms);
+                return iterator4->get_hash(container->m4);
+            }
+            case 5: {
+                return iterator5->get_hash(container->ms);
             }
             }
         }
 
-        size_t get_collision_chain_length() const { return 0; }
-
         /**
           * A hack for HashedDictionary.
           *
@@ -476,25 +503,11 @@ public:
     StringHashTable() = default;
 
     explicit StringHashTable(size_t reserve_for_num_elements)
-            : m1 {reserve_for_num_elements / 4},
-              m2 {reserve_for_num_elements / 4},
-              m3 {reserve_for_num_elements / 4},
-              ms {reserve_for_num_elements / 4} {}
-
-    StringHashTable(StringHashTable&& rhs) noexcept
-            : m1(std::move(rhs.m1)),
-              m2(std::move(rhs.m2)),
-              m3(std::move(rhs.m3)),
-              ms(std::move(rhs.ms)) {}
-
-    StringHashTable& operator=(StringHashTable&& other) {
-        std::swap(m0, other.m0);
-        std::swap(m1, other.m1);
-        std::swap(m2, other.m2);
-        std::swap(m3, other.m3);
-        std::swap(ms, other.ms);
-        return *this;
-    }
+            : m1 {reserve_for_num_elements / 5},
+              m2 {reserve_for_num_elements / 5},
+              m3 {reserve_for_num_elements / 5},
+              m4 {reserve_for_num_elements / 5},
+              ms {reserve_for_num_elements / 5} {}
 
     ~StringHashTable() = default;
 
@@ -524,24 +537,20 @@ public:
             return func(self.ms, std::forward<KeyHolder>(key), key, 
hash_value);
         }
 
-        switch ((sz - 1) >> 3) {
-        case 0: // 1..8 bytes
-        {
-            return func(self.m1, toStringKey<StringKey8>(key), key, 
hash_value);
-        }
-        case 1: // 9..16 bytes
-        {
-            return func(self.m2, toStringKey<StringKey16>(key), key, 
hash_value);
+        if (sz <= sizeof(StringHashMapSubKeys::T1)) {
+            return func(self.m1, to_string_key<StringHashMapSubKeys::T1>(key), 
key, hash_value);
         }
-        case 2: // 17..24 bytes
-        {
-            return func(self.m3, toStringKey<StringKey24>(key), key, 
hash_value);
+        if (sz <= sizeof(StringHashMapSubKeys::T2)) {
+            return func(self.m2, to_string_key<StringHashMapSubKeys::T2>(key), 
key, hash_value);
         }
-        default: // >= 25 bytes
-        {
-            return func(self.ms, std::forward<KeyHolder>(key), key, 
hash_value);
+        if (sz <= sizeof(StringHashMapSubKeys::T3)) {
+            return func(self.m3, to_string_key<StringHashMapSubKeys::T3>(key), 
key, hash_value);
         }
+        if (sz <= sizeof(StringHashMapSubKeys::T4)) {
+            return func(self.m4, to_string_key<StringHashMapSubKeys::T4>(key), 
key, hash_value);
         }
+
+        return func(self.ms, std::forward<KeyHolder>(key), key, hash_value);
     }
 
     struct EmplaceCallable {
@@ -594,12 +603,14 @@ public:
         if (!key.size) {
             return;
         }
-        if (key.size <= 8) {
+        if (key.size <= sizeof(StringHashMapSubKeys::T1)) {
             m1.template prefetch<read>(hash_value);
-        } else if (key.size <= 16) {
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T2)) {
             m2.template prefetch<read>(hash_value);
-        } else if (key.size <= 24) {
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T3)) {
             m3.template prefetch<read>(hash_value);
+        } else if (key.size <= sizeof(StringHashMapSubKeys::T4)) {
+            m4.template prefetch<read>(hash_value);
         } else {
             ms.template prefetch<read>(hash_value);
         }
@@ -613,10 +624,11 @@ public:
         auto ALWAYS_INLINE operator()(Submap& map, const SubmapKey& key, const 
Origin& origin,
                                       size_t hash) {
             auto it = map.find(key, hash);
-            if (!it)
+            if (!it) {
                 return decltype(&it->get_mapped()) {};
-            else
+            } else {
                 return &it->get_mapped();
+            }
         }
     };
 
@@ -628,14 +640,12 @@ public:
         return dispatch(*this, x, hash_value, FindCallable {});
     }
 
-    bool ALWAYS_INLINE has(const Key& x, size_t = 0) const {
-        return dispatch(*this, x, FindCallable {}) != nullptr;
+    size_t size() const {
+        return m0.size() + m1.size() + m2.size() + m3.size() + m4.size() + 
ms.size();
     }
 
-    size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() 
+ ms.size(); }
-
     bool empty() const {
-        return m0.empty() && m1.empty() && m2.empty() && m3.empty() && 
ms.empty();
+        return m0.empty() && m1.empty() && m2.empty() && m3.empty() && 
m4.empty() && ms.empty();
     }
 
     size_t get_buffer_size_in_bytes() const {
@@ -666,6 +676,7 @@ public:
 
     bool add_elem_size_overflow(size_t add_size) const {
         return m1.add_elem_size_overflow(add_size) || 
m2.add_elem_size_overflow(add_size) ||
-               m3.add_elem_size_overflow(add_size) || 
ms.add_elem_size_overflow(add_size);
+               m3.add_elem_size_overflow(add_size) || 
m4.add_elem_size_overflow(add_size) ||
+               ms.add_elem_size_overflow(add_size);
     }
 };
diff --git a/be/src/vec/common/memcpy_small.h b/be/src/vec/common/memcpy_small.h
index 47390066318..62a093e8b62 100644
--- a/be/src/vec/common/memcpy_small.h
+++ b/be/src/vec/common/memcpy_small.h
@@ -20,8 +20,11 @@
 
 #pragma once
 
+#include <glog/logging.h>
 #include <string.h>
 
+#include <cstdint>
+
 #if defined(__SSE2__) || defined(__aarch64__)
 #include "util/sse_util.hpp"
 
@@ -91,3 +94,32 @@ void memcpy_fixed(char* lhs, const char* rhs) {
         memcpy(lhs, rhs, sizeof(T));
     }
 }
+
+template <int max_size>
+inline void memcpy_small(char* lhs, const char* rhs, size_t n) {
+    DCHECK_NE(n, 0);
+    if constexpr (max_size >= 4) {
+        if (n >= 4) {
+            memcpy_fixed<uint32_t>(lhs, rhs);
+            lhs += 4;
+            rhs += 4;
+            n -= 4;
+        }
+    }
+    while (n >= 1) {
+        memcpy_fixed<uint8_t>(lhs, rhs);
+        lhs++;
+        rhs++;
+        n--;
+    }
+}
+
+template <>
+inline void memcpy_small<2>(char* lhs, const char* rhs, size_t n) {
+    DCHECK_NE(n, 0);
+    if (n == 2) {
+        memcpy_fixed<uint16_t>(lhs, rhs);
+    } else {
+        memcpy_fixed<uint8_t>(lhs, rhs);
+    }
+}
\ No newline at end of file
diff --git a/be/src/vec/exec/distinct_vaggregation_node.cpp 
b/be/src/vec/exec/distinct_vaggregation_node.cpp
index a5c57792ba3..09439010327 100644
--- a/be/src/vec/exec/distinct_vaggregation_node.cpp
+++ b/be/src/vec/exec/distinct_vaggregation_node.cpp
@@ -95,12 +95,12 @@ void 
DistinctAggregationNode::_emplace_into_hash_table_to_distinct(IColumn::Sele
                 auto creator = [&](const auto& ctor, auto& key, auto& origin) {
                     HashMethodType::try_presis_key(key, origin, 
*_agg_arena_pool);
                     ctor(key, dummy_mapped_data);
-                    distinct_row.push_back(row);
+                    distinct_row.push_back_without_reserve(row);
                 };
 
                 auto creator_for_null_key = [&](auto& mapped) {
                     mapped = dummy_mapped_data;
-                    distinct_row.push_back(row);
+                    distinct_row.push_back_without_reserve(row);
                 };
 
                 SCOPED_TIMER(_hash_table_emplace_timer);
diff --git a/be/src/vec/exec/vaggregation_node.cpp 
b/be/src/vec/exec/vaggregation_node.cpp
index 020763ecff7..5e0f9762668 100644
--- a/be/src/vec/exec/vaggregation_node.cpp
+++ b/be/src/vec/exec/vaggregation_node.cpp
@@ -102,26 +102,10 @@ static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
 AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode,
                                  const DescriptorTbl& descs)
         : ExecNode(pool, tnode, descs),
-          _hash_table_compute_timer(nullptr),
-          _hash_table_input_counter(nullptr),
-          _expr_timer(nullptr),
           _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id),
-          _intermediate_tuple_desc(nullptr),
           _output_tuple_id(tnode.agg_node.output_tuple_id),
-          _output_tuple_desc(nullptr),
           _needs_finalize(tnode.agg_node.need_finalize),
-          _is_merge(false),
-          _serialize_key_timer(nullptr),
-          _merge_timer(nullptr),
-          _get_results_timer(nullptr),
-          _serialize_data_timer(nullptr),
-          _serialize_result_timer(nullptr),
-          _deserialize_data_timer(nullptr),
-          _hash_table_iterate_timer(nullptr),
-          _insert_keys_to_column_timer(nullptr),
-          _streaming_agg_timer(nullptr),
-          _hash_table_size_counter(nullptr),
-          _max_row_size_counter(nullptr) {
+          _is_merge(false) {
     if (tnode.agg_node.__isset.use_streaming_preaggregation) {
         _is_streaming_preagg = tnode.agg_node.use_streaming_preaggregation;
         if (_is_streaming_preagg) {
@@ -593,7 +577,7 @@ Status 
AggregationNode::_get_without_key_result(RuntimeState* state, Block* bloc
     }
 
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
-        auto column = columns[i].get();
+        auto* column = columns[i].get();
         _aggregate_evaluators[i]->insert_result_info(
                 _agg_data->without_key + _offsets_of_aggregate_states[i], 
column);
     }
@@ -816,13 +800,13 @@ Status AggregationNode::_reset_hash_table() {
                     }
                 });
 
-                _aggregate_data_container.reset(new AggregateDataContainer(
+                _aggregate_data_container = 
std::make_unique<AggregateDataContainer>(
                         sizeof(typename HashTableType::key_type),
                         ((_total_size_of_aggregate_states + 
_align_aggregate_states - 1) /
                          _align_aggregate_states) *
-                                _align_aggregate_states));
-                hash_table = HashTableType();
-                _agg_arena_pool.reset(new Arena);
+                                _align_aggregate_states);
+                agg_method.hash_table.reset(new HashTableType());
+                _agg_arena_pool = std::make_unique<Arena>();
                 return Status::OK();
             },
             _agg_data->method_variant);
@@ -845,7 +829,7 @@ void 
AggregationNode::_emplace_into_hash_table(AggregateDataPtr* places, ColumnR
 
                 auto creator = [this](const auto& ctor, auto& key, auto& 
origin) {
                     try {
-                        HashMethodType::try_presis_key(key, origin, 
*_agg_arena_pool);
+                        HashMethodType::try_presis_key_and_origin(key, origin, 
*_agg_arena_pool);
                         auto mapped = 
_aggregate_data_container->append_data(origin);
                         auto st = _create_agg_status(mapped);
                         if (!st) {
diff --git a/be/src/vec/exec/vpartition_sort_node.cpp 
b/be/src/vec/exec/vpartition_sort_node.cpp
index 6db90763fa7..6a7ebcffd1e 100644
--- a/be/src/vec/exec/vpartition_sort_node.cpp
+++ b/be/src/vec/exec/vpartition_sort_node.cpp
@@ -116,7 +116,7 @@ void VPartitionSortNode::_emplace_into_hash_table(const 
ColumnRawPtrs& key_colum
 
                 auto creator = [&](const auto& ctor, auto& key, auto& origin) {
                     HashMethodType::try_presis_key(key, origin, 
*_agg_arena_pool);
-                    auto aggregate_data = _pool->add(new PartitionBlocks());
+                    auto* aggregate_data = _pool->add(new PartitionBlocks());
                     _value_places.push_back(aggregate_data);
                     ctor(key, aggregate_data);
                     _num_partition++;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to