This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit a5ca8833d7ad2ecbcee1962cc6e36f00bdfcffdc Author: Pxl <pxl...@qq.com> AuthorDate: Thu Jan 18 16:47:03 2024 +0800 [Improvement](aggregate) optimize for small string aggregate (#29919) --- be/src/pipeline/exec/aggregation_sink_operator.cpp | 4 +- .../pipeline/exec/aggregation_source_operator.cpp | 9 +- .../pipeline/exec/partition_sort_sink_operator.cpp | 4 +- be/src/vec/common/hash_table/hash_map.h | 6 - be/src/vec/common/hash_table/hash_map_context.h | 20 +- be/src/vec/common/hash_table/hash_table.h | 28 --- .../vec/common/hash_table/partitioned_hash_table.h | 11 -- be/src/vec/common/hash_table/ph_hash_map.h | 8 - be/src/vec/common/hash_table/string_hash_map.h | 66 +++---- be/src/vec/common/hash_table/string_hash_table.h | 219 +++++++++++---------- be/src/vec/common/memcpy_small.h | 32 +++ be/src/vec/exec/distinct_vaggregation_node.cpp | 4 +- be/src/vec/exec/vaggregation_node.cpp | 30 +-- be/src/vec/exec/vpartition_sort_node.cpp | 2 +- 14 files changed, 202 insertions(+), 241 deletions(-) diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp index 3ba4dd05dc7..8d3a70a3a3a 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp @@ -536,7 +536,7 @@ void AggSinkLocalState<DependencyType, Derived>::_emplace_into_hash_table( agg_method.init_serialized_keys(key_columns, num_rows); auto creator = [this](const auto& ctor, auto& key, auto& origin) { - HashMethodType::try_presis_key(key, origin, *_agg_arena_pool); + HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool); auto mapped = Base::_shared_state->aggregate_data_container->append_data(origin); auto st = _create_agg_status(mapped); @@ -686,7 +686,7 @@ Status AggSinkLocalState<DependencyType, Derived>::_reset_hash_table() { ((ss.total_size_of_aggregate_states + ss.align_aggregate_states - 1) / ss.align_aggregate_states) * ss.align_aggregate_states)); - hash_table = HashTableType(); + agg_method.hash_table.reset(new HashTableType()); ss.agg_arena_pool.reset(new vectorized::Arena); return Status::OK(); }, diff --git a/be/src/pipeline/exec/aggregation_source_operator.cpp b/be/src/pipeline/exec/aggregation_source_operator.cpp index dc476cf63a0..7e2fca9bc67 100644 --- a/be/src/pipeline/exec/aggregation_source_operator.cpp +++ b/be/src/pipeline/exec/aggregation_source_operator.cpp @@ -17,6 +17,7 @@ #include "aggregation_source_operator.h" +#include <memory> #include <string> #include "common/exception.h" @@ -141,13 +142,13 @@ Status AggLocalState::_reset_hash_table() { } }); - ss.aggregate_data_container.reset(new vectorized::AggregateDataContainer( + ss.aggregate_data_container = std::make_unique<vectorized::AggregateDataContainer>( sizeof(typename HashTableType::key_type), ((ss.total_size_of_aggregate_states + ss.align_aggregate_states - 1) / ss.align_aggregate_states) * - ss.align_aggregate_states)); - hash_table = HashTableType(); - ss.agg_arena_pool.reset(new vectorized::Arena); + ss.align_aggregate_states); + agg_method.hash_table.reset(new HashTableType()); + ss.agg_arena_pool = std::make_unique<vectorized::Arena>(); return Status::OK(); }, ss.agg_data->method_variant); diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp b/be/src/pipeline/exec/partition_sort_sink_operator.cpp index 478a3eedb20..14d917d62e3 100644 --- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp +++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp @@ -189,7 +189,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table( auto creator = [&](const auto& ctor, auto& key, auto& origin) { HashMethodType::try_presis_key(key, origin, *local_state._agg_arena_pool); - auto aggregate_data = _pool->add(new vectorized::PartitionBlocks()); + auto* aggregate_data = _pool->add(new vectorized::PartitionBlocks()); local_state._value_places.push_back(aggregate_data); ctor(key, aggregate_data); local_state._num_partition++; @@ -206,7 +206,7 @@ void PartitionSortSinkOperatorX::_emplace_into_hash_table( agg_method.lazy_emplace(state, row, creator, creator_for_null_key); mapped->add_row_idx(row); } - for (auto place : local_state._value_places) { + for (auto* place : local_state._value_places) { SCOPED_TIMER(local_state._selector_block_timer); place->append_block_by_selector(input_block, _child_x->row_desc(), _has_global_limit, _partition_inner_limit, diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h index bd7c413dd97..382f46acb74 100644 --- a/be/src/vec/common/hash_table/hash_map.h +++ b/be/src/vec/common/hash_table/hash_map.h @@ -155,12 +155,6 @@ public: using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable; - /// Call func(const Key &, Mapped &) for each hash map element. - template <typename Func> - void for_each_value(Func&& func) { - for (auto& v : *this) func(v.get_first(), v.get_second()); - } - /// Call func(Mapped &) for each hash map element. template <typename Func> void for_each_mapped(Func&& func) { diff --git a/be/src/vec/common/hash_table/hash_map_context.h b/be/src/vec/common/hash_table/hash_map_context.h index 3784379902b..d96aa2d7c65 100644 --- a/be/src/vec/common/hash_table/hash_map_context.h +++ b/be/src/vec/common/hash_table/hash_map_context.h @@ -128,20 +128,22 @@ struct MethodBase { template <typename State> ALWAYS_INLINE auto find(State& state, size_t i) { - prefetch<true>(i); + if constexpr (!is_string_hash_map()) { + prefetch<true>(i); + } return state.find_key_with_hash(*hash_table, hash_values[i], keys[i]); } template <typename State, typename F, typename FF> ALWAYS_INLINE auto& lazy_emplace(State& state, size_t i, F&& creator, FF&& creator_for_null_key) { - prefetch<false>(i); + if constexpr (!is_string_hash_map()) { + prefetch<false>(i); + } return state.lazy_emplace_key(*hash_table, i, keys[i], hash_values[i], creator, creator_for_null_key); } - static constexpr bool need_presis() { return std::is_same_v<Key, StringRef>; } - static constexpr bool is_string_hash_map() { return std::is_same_v<StringHashMap<Mapped>, HashMap> || std::is_same_v<DataWithNullKey<StringHashMap<Mapped>>, HashMap>; @@ -149,7 +151,14 @@ struct MethodBase { template <typename Key, typename Origin> static void try_presis_key(Key& key, Origin& origin, Arena& arena) { - if constexpr (need_presis()) { + if constexpr (std::is_same_v<Key, StringRef>) { + key.data = arena.insert(key.data, key.size); + } + } + + template <typename Key, typename Origin> + static void try_presis_key_and_origin(Key& key, Origin& origin, Arena& arena) { + if constexpr (std::is_same_v<Origin, StringRef>) { origin.data = arena.insert(origin.data, origin.size); if constexpr (!is_string_hash_map()) { key = origin; @@ -303,7 +312,6 @@ struct MethodOneNumber : public MethodBase<TData> { ->get_raw_data() .data : key_columns[0]->get_raw_data().data); - std::string name = key_columns[0]->get_name(); if (is_join) { Base::init_join_bucket_num(num_rows, bucket_size, null_map); } else { diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index fcc682a49cb..7e669f275e2 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -353,29 +353,6 @@ public: } }; -static_assert(sizeof(HashTableGrowerWithPrecalculation<>) == 64); - -/** When used as a Grower, it turns a hash table into something like a lookup table. - * It remains non-optimal - the cells store the keys. - * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed. - * TODO Make a proper lookup table. - */ -template <size_t key_bits> -struct HashTableFixedGrower { - size_t buf_size() const { return 1ULL << key_bits; } - size_t place(size_t x) const { return x; } - /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently. - size_t next(size_t pos) const { return pos + 1; } - bool overflow(size_t /*elems*/) const { return false; } - - void increase_size() { - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); - } - void set(size_t /*num_elems*/) {} - void set_buf_size(size_t /*buf_size_*/) {} -}; - /** If you want to store the zero key separately - a place to store it. */ template <bool need_zero_value_storage, typename Cell> struct ZeroValueStorage; @@ -573,11 +550,6 @@ protected: auto get_ptr() const { return ptr; } size_t get_hash() const { return ptr->get_hash(*container); } - size_t get_collision_chain_length() const { - return container->grower.place((ptr - container->buf) - - container->grower.place(get_hash())); - } - /** * A hack for HashedDictionary. * diff --git a/be/src/vec/common/hash_table/partitioned_hash_table.h b/be/src/vec/common/hash_table/partitioned_hash_table.h index c7626f1fc84..c6a19b36d3a 100644 --- a/be/src/vec/common/hash_table/partitioned_hash_table.h +++ b/be/src/vec/common/hash_table/partitioned_hash_table.h @@ -97,17 +97,6 @@ public: } } - template <typename Func> - void ALWAYS_INLINE for_each_value(Func&& func) { - if (_is_partitioned) { - for (auto i = 0u; i < NUM_LEVEL1_SUB_TABLES; ++i) { - level1_sub_tables[i].for_each_value(func); - } - } else { - level0_sub_table.for_each_value(func); - } - } - size_t size() { size_t count = 0; if (_is_partitioned) { diff --git a/be/src/vec/common/hash_table/ph_hash_map.h b/be/src/vec/common/hash_table/ph_hash_map.h index f1db30f41a5..8f61ee966c5 100644 --- a/be/src/vec/common/hash_table/ph_hash_map.h +++ b/be/src/vec/common/hash_table/ph_hash_map.h @@ -102,8 +102,6 @@ public: auto get_ptr() const { return this; } size_t get_hash() const { return base_iterator->get_hash(); } - - size_t get_collision_chain_length() const { return 0; } }; class iterator : public iterator_base<iterator, false> { @@ -202,12 +200,6 @@ public: _hash_map.prefetch_hash(hash_value); } - /// Call func(const Key &, Mapped &) for each hash map element. - template <typename Func> - void for_each_value(Func&& func) { - for (auto& v : *this) func(v.get_first(), v.get_second()); - } - /// Call func(Mapped &) for each hash map element. template <typename Func> void for_each_mapped(Func&& func) { diff --git a/be/src/vec/common/hash_table/string_hash_map.h b/be/src/vec/common/hash_table/string_hash_map.h index 97f63553f43..f1efd0fab12 100644 --- a/be/src/vec/common/hash_table/string_hash_map.h +++ b/be/src/vec/common/hash_table/string_hash_map.h @@ -55,26 +55,6 @@ struct StringHashMapCell<StringKey16, TMapped> static const StringKey16& get_key(const value_type& value_) { return value_.first; } }; -template <typename TMapped> -struct StringHashMapCell<StringKey24, TMapped> - : public HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState> { - using Base = HashMapCell<StringKey24, TMapped, StringHashTableHash, HashTableNoState>; - using value_type = typename Base::value_type; - using Base::Base; - static constexpr bool need_zero_value_storage = false; - bool is_zero(const HashTableNoState& state) const { return is_zero(this->value.first, state); } - - // Zero means unoccupied cells in hash table. Use key with last word = 0 as - // zero keys, because such keys are unrepresentable (no way to encode length). - static bool is_zero(const StringKey24& key, const HashTableNoState&) { return key.c == 0; } - void set_zero() { this->value.first.c = 0; } - - // external - const doris::StringRef get_key() const { return to_string_ref(this->value.first); } /// NOLINT - // internal - static const StringKey24& get_key(const value_type& value_) { return value_.first; } -}; - template <typename TMapped> struct StringHashMapCell<doris::StringRef, TMapped> : public HashMapCellWithSavedHash<doris::StringRef, TMapped, StringHashTableHash, @@ -108,11 +88,17 @@ struct StringHashMapCell<doris::StringRef, TMapped> template <typename TMapped, typename Allocator> struct StringHashMapSubMaps { using T0 = StringHashTableEmpty<StringHashMapCell<doris::StringRef, TMapped>>; - using T1 = HashMapTable<StringKey8, StringHashMapCell<StringKey8, TMapped>, StringHashTableHash, - StringHashTableGrower<>, Allocator>; - using T2 = HashMapTable<StringKey16, StringHashMapCell<StringKey16, TMapped>, + using T1 = HashMapTable<StringHashMapSubKeys::T1, + StringHashMapCell<StringHashMapSubKeys::T1, TMapped>, + StringHashTableHash, StringHashTableGrower<4>, Allocator>; + using T2 = HashMapTable<StringHashMapSubKeys::T2, + StringHashMapCell<StringHashMapSubKeys::T2, TMapped>, + StringHashTableHash, StringHashTableGrower<>, Allocator>; + using T3 = HashMapTable<StringHashMapSubKeys::T3, + StringHashMapCell<StringHashMapSubKeys::T3, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; - using T3 = HashMapTable<StringKey24, StringHashMapCell<StringKey24, TMapped>, + using T4 = HashMapTable<StringHashMapSubKeys::T4, + StringHashMapCell<StringHashMapSubKeys::T4, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; using Ts = HashMapTable<doris::StringRef, StringHashMapCell<doris::StringRef, TMapped>, StringHashTableHash, StringHashTableGrower<>, Allocator>; @@ -132,42 +118,34 @@ public: LookupResult it; bool inserted; this->emplace(x, it, inserted); - if (inserted) new (&it->get_mapped()) TMapped(); + if (inserted) { + new (&it->get_mapped()) TMapped(); + } return it->get_mapped(); } template <typename Func> - void ALWAYS_INLINE for_each_value(Func&& func) { + void ALWAYS_INLINE for_each_mapped(Func&& func) { if (this->m0.size()) { - func(doris::StringRef {}, this->m0.zero_value()->get_second()); + func(this->m0.zero_value()->get_second()); } - for (auto& v : this->m1) { - func(v.get_key(), v.get_second()); + func(v.get_second()); } - for (auto& v : this->m2) { - func(v.get_key(), v.get_second()); + func(v.get_second()); } - for (auto& v : this->m3) { - func(v.get_key(), v.get_second()); + func(v.get_second()); + } + for (auto& v : this->m4) { + func(v.get_second()); } - for (auto& v : this->ms) { - func(v.get_key(), v.get_second()); + func(v.get_second()); } } - - template <typename Func> - void ALWAYS_INLINE for_each_mapped(Func&& func) { - if (this->m0.size()) func(this->m0.zero_value()->get_second()); - for (auto& v : this->m1) func(v.get_second()); - for (auto& v : this->m2) func(v.get_second()); - for (auto& v : this->m3) func(v.get_second()); - for (auto& v : this->ms) func(v.get_second()); - } template <typename MappedType> char* get_null_key_data() { return nullptr; diff --git a/be/src/vec/common/hash_table/string_hash_table.h b/be/src/vec/common/hash_table/string_hash_table.h index 09e326761c4..5d98ff0b720 100644 --- a/be/src/vec/common/hash_table/string_hash_table.h +++ b/be/src/vec/common/hash_table/string_hash_table.h @@ -24,41 +24,43 @@ #include <variant> #include "vec/common/hash_table/hash.h" +#include "vec/common/hash_table/hash_table.h" +#include "vec/common/memcpy_small.h" +using StringKey2 = doris::vectorized::UInt16; +using StringKey4 = doris::vectorized::UInt32; using StringKey8 = doris::vectorized::UInt64; using StringKey16 = doris::vectorized::UInt128; -struct StringKey24 { - doris::vectorized::UInt64 a; - doris::vectorized::UInt64 b; - doris::vectorized::UInt64 c; - bool operator==(const StringKey24 rhs) const { return a == rhs.a && b == rhs.b && c == rhs.c; } +struct StringHashMapSubKeys { + using T1 = StringKey2; + using T2 = StringKey4; + using T3 = StringKey8; + using T4 = StringKey16; }; template <typename StringKey> -StringKey toStringKey(const doris::StringRef& key) { +StringKey to_string_key(const doris::StringRef& key) { DCHECK_LE(key.size, sizeof(StringKey)); StringKey string_key {}; - memcpy((char*)&string_key, key.data, key.size); + memcpy_small<sizeof(StringKey)>((char*)&string_key, key.data, key.size); return string_key; } -inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey8& n) { +template <typename T> +inline doris::StringRef ALWAYS_INLINE to_string_ref(const T& n) { assert(n != 0); - return {reinterpret_cast<const char*>(&n), 8ul - (__builtin_clzll(n) >> 3)}; + return {reinterpret_cast<const char*>(&n), sizeof(T) - (__builtin_clzll(n) >> 3)}; } inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey16& n) { assert(n.high != 0); return {reinterpret_cast<const char*>(&n), 16ul - (__builtin_clzll(n.high) >> 3)}; } -inline doris::StringRef ALWAYS_INLINE to_string_ref(const StringKey24& n) { - assert(n.c != 0); - return {reinterpret_cast<const char*>(&n), 24ul - (__builtin_clzll(n.c) >> 3)}; -} struct StringHashTableHash { #if defined(__SSE4_2__) || defined(__aarch64__) - size_t ALWAYS_INLINE operator()(StringKey8 key) const { + template <typename T> + size_t ALWAYS_INLINE operator()(T key) const { size_t res = -1ULL; res = _mm_crc32_u64(res, key); return res; @@ -69,31 +71,23 @@ struct StringHashTableHash { res = _mm_crc32_u64(res, key.high); return res; } - size_t ALWAYS_INLINE operator()(StringKey24 key) const { - size_t res = -1ULL; - res = _mm_crc32_u64(res, key.a); - res = _mm_crc32_u64(res, key.b); - res = _mm_crc32_u64(res, key.c); - return res; - } #else - size_t ALWAYS_INLINE operator()(StringKey8 key) const { - return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 8); - } - size_t ALWAYS_INLINE operator()(StringKey16 key) const { - return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 16); - } - size_t ALWAYS_INLINE operator()(StringKey24 key) const { - return util_hash::CityHash64(reinterpret_cast<const char*>(&key), 24); + template <typename T> + size_t ALWAYS_INLINE operator()(T key) const { + return util_hash::CityHash64(reinterpret_cast<const char*>(&key), sizeof(T)); } #endif size_t ALWAYS_INLINE operator()(doris::StringRef key) const { - if (key.size <= 8) { - return StringHashTableHash()(toStringKey<StringKey8>(key)); - } else if (key.size <= 16) { - return StringHashTableHash()(toStringKey<StringKey16>(key)); - } else if (key.size <= 24) { - return StringHashTableHash()(toStringKey<StringKey24>(key)); + if (key.size == 0) { + return 0; + } else if (key.size <= sizeof(StringHashMapSubKeys::T1)) { + return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T1>(key)); + } else if (key.size <= sizeof(StringHashMapSubKeys::T2)) { + return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T2>(key)); + } else if (key.size <= sizeof(StringHashMapSubKeys::T3)) { + return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T3>(key)); + } else if (key.size <= sizeof(StringHashMapSubKeys::T4)) { + return StringHashTableHash()(to_string_key<StringHashMapSubKeys::T4>(key)); } return doris::StringRefHash()(key); } @@ -194,10 +188,9 @@ struct StringHashTableGrower : public HashTableGrowerWithPrecalculation<initial_ template <typename Mapped> struct StringHashTableLookupResult { Mapped* mapped_ptr; - StringHashTableLookupResult() : mapped_ptr(nullptr) {} /// NOLINT - StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) {} /// NOLINT - StringHashTableLookupResult(std::nullptr_t) {} /// NOLINT - const VoidKey getKey() const { return {}; } /// NOLINT + StringHashTableLookupResult() : mapped_ptr(nullptr) {} + StringHashTableLookupResult(Mapped* mapped_ptr_) : mapped_ptr(mapped_ptr_) {} + StringHashTableLookupResult(std::nullptr_t) {} auto& get_mapped() { return *mapped_ptr; } auto& operator*() { return *this; } auto& operator*() const { return *this; } @@ -226,7 +219,6 @@ ALWAYS_INLINE inline auto lookup_result_get_mapped(StringHashTableLookupResult<M template <typename SubMaps> class StringHashTable : private boost::noncopyable { protected: - static constexpr size_t NUM_MAPS = 5; // Map for storing empty string using T0 = typename SubMaps::T0; @@ -234,18 +226,17 @@ protected: using T1 = typename SubMaps::T1; using T2 = typename SubMaps::T2; using T3 = typename SubMaps::T3; + using T4 = typename SubMaps::T4; // Long strings are stored as doris::StringRef along with saved hash using Ts = typename SubMaps::Ts; using Self = StringHashTable; - template <typename, typename, size_t> - friend class TwoLevelStringHashTable; - T0 m0; T1 m1; T2 m2; T3 m3; + T4 m4; Ts ms; using Cell = typename Ts::cell_type; @@ -259,7 +250,8 @@ protected: typename T1::iterator iterator1; typename T2::iterator iterator2; typename T3::iterator iterator3; - typename Ts::iterator iterator4; + typename T4::iterator iterator4; + typename Ts::iterator iterator5; typename Ts::cell_type cell; @@ -269,39 +261,52 @@ protected: iterator_base() = default; iterator_base(Container* container_, bool end = false) : container(container_) { if (end) { - sub_table_index = 4; - iterator4 = container->ms.end(); + sub_table_index = 5; + iterator5 = container->ms.end(); } else { sub_table_index = 0; - if (container->m0.size() == 0) + if (container->m0.size() == 0) { sub_table_index++; - else + } else { return; + } iterator1 = container->m1.begin(); - if (iterator1 == container->m1.end()) + if (iterator1 == container->m1.end()) { sub_table_index++; - else + } else { return; + } iterator2 = container->m2.begin(); - if (iterator2 == container->m2.end()) + if (iterator2 == container->m2.end()) { sub_table_index++; - else + } else { return; + } iterator3 = container->m3.begin(); - if (iterator3 == container->m3.end()) + if (iterator3 == container->m3.end()) { + sub_table_index++; + } else { + return; + } + + iterator4 = container->m4.begin(); + if (iterator4 == container->m4.end()) { sub_table_index++; - else + } else { return; + } - iterator4 = container->ms.begin(); + iterator5 = container->ms.begin(); } } bool operator==(const iterator_base& rhs) const { - if (sub_table_index != rhs.sub_table_index) return false; + if (sub_table_index != rhs.sub_table_index) { + return false; + } switch (sub_table_index) { case 0: { return true; @@ -318,6 +323,9 @@ protected: case 4: { return iterator4 == rhs.iterator4; } + case 5: { + return iterator5 == rhs.iterator5; + } } LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); @@ -355,6 +363,13 @@ protected: } case 4: { ++iterator4; + if (iterator4 == container->m4.end()) { + need_switch_to_next = true; + } + break; + } + case 5: { + ++iterator5; break; } } @@ -385,7 +400,14 @@ protected: break; } case 4: { - iterator4 = container->ms.begin(); + iterator4 = container->m4.begin(); + if (iterator4 == container->m4.end()) { + need_switch_to_next = true; + } + break; + } + case 5: { + iterator5 = container->ms.begin(); break; } } @@ -416,6 +438,10 @@ protected: const_cast<iterator_base*>(this)->cell = *iterator4; break; } + case 5: { + const_cast<iterator_base*>(this)->cell = *iterator5; + break; + } } return cell; } @@ -438,13 +464,14 @@ protected: return iterator3->get_hash(container->m3); } case 4: { - return iterator4->get_hash(container->ms); + return iterator4->get_hash(container->m4); + } + case 5: { + return iterator5->get_hash(container->ms); } } } - size_t get_collision_chain_length() const { return 0; } - /** * A hack for HashedDictionary. * @@ -476,25 +503,11 @@ public: StringHashTable() = default; explicit StringHashTable(size_t reserve_for_num_elements) - : m1 {reserve_for_num_elements / 4}, - m2 {reserve_for_num_elements / 4}, - m3 {reserve_for_num_elements / 4}, - ms {reserve_for_num_elements / 4} {} - - StringHashTable(StringHashTable&& rhs) noexcept - : m1(std::move(rhs.m1)), - m2(std::move(rhs.m2)), - m3(std::move(rhs.m3)), - ms(std::move(rhs.ms)) {} - - StringHashTable& operator=(StringHashTable&& other) { - std::swap(m0, other.m0); - std::swap(m1, other.m1); - std::swap(m2, other.m2); - std::swap(m3, other.m3); - std::swap(ms, other.ms); - return *this; - } + : m1 {reserve_for_num_elements / 5}, + m2 {reserve_for_num_elements / 5}, + m3 {reserve_for_num_elements / 5}, + m4 {reserve_for_num_elements / 5}, + ms {reserve_for_num_elements / 5} {} ~StringHashTable() = default; @@ -524,24 +537,20 @@ public: return func(self.ms, std::forward<KeyHolder>(key), key, hash_value); } - switch ((sz - 1) >> 3) { - case 0: // 1..8 bytes - { - return func(self.m1, toStringKey<StringKey8>(key), key, hash_value); - } - case 1: // 9..16 bytes - { - return func(self.m2, toStringKey<StringKey16>(key), key, hash_value); + if (sz <= sizeof(StringHashMapSubKeys::T1)) { + return func(self.m1, to_string_key<StringHashMapSubKeys::T1>(key), key, hash_value); } - case 2: // 17..24 bytes - { - return func(self.m3, toStringKey<StringKey24>(key), key, hash_value); + if (sz <= sizeof(StringHashMapSubKeys::T2)) { + return func(self.m2, to_string_key<StringHashMapSubKeys::T2>(key), key, hash_value); } - default: // >= 25 bytes - { - return func(self.ms, std::forward<KeyHolder>(key), key, hash_value); + if (sz <= sizeof(StringHashMapSubKeys::T3)) { + return func(self.m3, to_string_key<StringHashMapSubKeys::T3>(key), key, hash_value); } + if (sz <= sizeof(StringHashMapSubKeys::T4)) { + return func(self.m4, to_string_key<StringHashMapSubKeys::T4>(key), key, hash_value); } + + return func(self.ms, std::forward<KeyHolder>(key), key, hash_value); } struct EmplaceCallable { @@ -594,12 +603,14 @@ public: if (!key.size) { return; } - if (key.size <= 8) { + if (key.size <= sizeof(StringHashMapSubKeys::T1)) { m1.template prefetch<read>(hash_value); - } else if (key.size <= 16) { + } else if (key.size <= sizeof(StringHashMapSubKeys::T2)) { m2.template prefetch<read>(hash_value); - } else if (key.size <= 24) { + } else if (key.size <= sizeof(StringHashMapSubKeys::T3)) { m3.template prefetch<read>(hash_value); + } else if (key.size <= sizeof(StringHashMapSubKeys::T4)) { + m4.template prefetch<read>(hash_value); } else { ms.template prefetch<read>(hash_value); } @@ -613,10 +624,11 @@ public: auto ALWAYS_INLINE operator()(Submap& map, const SubmapKey& key, const Origin& origin, size_t hash) { auto it = map.find(key, hash); - if (!it) + if (!it) { return decltype(&it->get_mapped()) {}; - else + } else { return &it->get_mapped(); + } } }; @@ -628,14 +640,12 @@ public: return dispatch(*this, x, hash_value, FindCallable {}); } - bool ALWAYS_INLINE has(const Key& x, size_t = 0) const { - return dispatch(*this, x, FindCallable {}) != nullptr; + size_t size() const { + return m0.size() + m1.size() + m2.size() + m3.size() + m4.size() + ms.size(); } - size_t size() const { return m0.size() + m1.size() + m2.size() + m3.size() + ms.size(); } - bool empty() const { - return m0.empty() && m1.empty() && m2.empty() && m3.empty() && ms.empty(); + return m0.empty() && m1.empty() && m2.empty() && m3.empty() && m4.empty() && ms.empty(); } size_t get_buffer_size_in_bytes() const { @@ -666,6 +676,7 @@ public: bool add_elem_size_overflow(size_t add_size) const { return m1.add_elem_size_overflow(add_size) || m2.add_elem_size_overflow(add_size) || - m3.add_elem_size_overflow(add_size) || ms.add_elem_size_overflow(add_size); + m3.add_elem_size_overflow(add_size) || m4.add_elem_size_overflow(add_size) || + ms.add_elem_size_overflow(add_size); } }; diff --git a/be/src/vec/common/memcpy_small.h b/be/src/vec/common/memcpy_small.h index 47390066318..62a093e8b62 100644 --- a/be/src/vec/common/memcpy_small.h +++ b/be/src/vec/common/memcpy_small.h @@ -20,8 +20,11 @@ #pragma once +#include <glog/logging.h> #include <string.h> +#include <cstdint> + #if defined(__SSE2__) || defined(__aarch64__) #include "util/sse_util.hpp" @@ -91,3 +94,32 @@ void memcpy_fixed(char* lhs, const char* rhs) { memcpy(lhs, rhs, sizeof(T)); } } + +template <int max_size> +inline void memcpy_small(char* lhs, const char* rhs, size_t n) { + DCHECK_NE(n, 0); + if constexpr (max_size >= 4) { + if (n >= 4) { + memcpy_fixed<uint32_t>(lhs, rhs); + lhs += 4; + rhs += 4; + n -= 4; + } + } + while (n >= 1) { + memcpy_fixed<uint8_t>(lhs, rhs); + lhs++; + rhs++; + n--; + } +} + +template <> +inline void memcpy_small<2>(char* lhs, const char* rhs, size_t n) { + DCHECK_NE(n, 0); + if (n == 2) { + memcpy_fixed<uint16_t>(lhs, rhs); + } else { + memcpy_fixed<uint8_t>(lhs, rhs); + } +} \ No newline at end of file diff --git a/be/src/vec/exec/distinct_vaggregation_node.cpp b/be/src/vec/exec/distinct_vaggregation_node.cpp index a5c57792ba3..09439010327 100644 --- a/be/src/vec/exec/distinct_vaggregation_node.cpp +++ b/be/src/vec/exec/distinct_vaggregation_node.cpp @@ -95,12 +95,12 @@ void DistinctAggregationNode::_emplace_into_hash_table_to_distinct(IColumn::Sele auto creator = [&](const auto& ctor, auto& key, auto& origin) { HashMethodType::try_presis_key(key, origin, *_agg_arena_pool); ctor(key, dummy_mapped_data); - distinct_row.push_back(row); + distinct_row.push_back_without_reserve(row); }; auto creator_for_null_key = [&](auto& mapped) { mapped = dummy_mapped_data; - distinct_row.push_back(row); + distinct_row.push_back_without_reserve(row); }; SCOPED_TIMER(_hash_table_emplace_timer); diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp index 020763ecff7..5e0f9762668 100644 --- a/be/src/vec/exec/vaggregation_node.cpp +++ b/be/src/vec/exec/vaggregation_node.cpp @@ -102,26 +102,10 @@ static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE = AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), - _hash_table_compute_timer(nullptr), - _hash_table_input_counter(nullptr), - _expr_timer(nullptr), _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id), - _intermediate_tuple_desc(nullptr), _output_tuple_id(tnode.agg_node.output_tuple_id), - _output_tuple_desc(nullptr), _needs_finalize(tnode.agg_node.need_finalize), - _is_merge(false), - _serialize_key_timer(nullptr), - _merge_timer(nullptr), - _get_results_timer(nullptr), - _serialize_data_timer(nullptr), - _serialize_result_timer(nullptr), - _deserialize_data_timer(nullptr), - _hash_table_iterate_timer(nullptr), - _insert_keys_to_column_timer(nullptr), - _streaming_agg_timer(nullptr), - _hash_table_size_counter(nullptr), - _max_row_size_counter(nullptr) { + _is_merge(false) { if (tnode.agg_node.__isset.use_streaming_preaggregation) { _is_streaming_preagg = tnode.agg_node.use_streaming_preaggregation; if (_is_streaming_preagg) { @@ -593,7 +577,7 @@ Status AggregationNode::_get_without_key_result(RuntimeState* state, Block* bloc } for (int i = 0; i < _aggregate_evaluators.size(); ++i) { - auto column = columns[i].get(); + auto* column = columns[i].get(); _aggregate_evaluators[i]->insert_result_info( _agg_data->without_key + _offsets_of_aggregate_states[i], column); } @@ -816,13 +800,13 @@ Status AggregationNode::_reset_hash_table() { } }); - _aggregate_data_container.reset(new AggregateDataContainer( + _aggregate_data_container = std::make_unique<AggregateDataContainer>( sizeof(typename HashTableType::key_type), ((_total_size_of_aggregate_states + _align_aggregate_states - 1) / _align_aggregate_states) * - _align_aggregate_states)); - hash_table = HashTableType(); - _agg_arena_pool.reset(new Arena); + _align_aggregate_states); + agg_method.hash_table.reset(new HashTableType()); + _agg_arena_pool = std::make_unique<Arena>(); return Status::OK(); }, _agg_data->method_variant); @@ -845,7 +829,7 @@ void AggregationNode::_emplace_into_hash_table(AggregateDataPtr* places, ColumnR auto creator = [this](const auto& ctor, auto& key, auto& origin) { try { - HashMethodType::try_presis_key(key, origin, *_agg_arena_pool); + HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool); auto mapped = _aggregate_data_container->append_data(origin); auto st = _create_agg_status(mapped); if (!st) { diff --git a/be/src/vec/exec/vpartition_sort_node.cpp b/be/src/vec/exec/vpartition_sort_node.cpp index 6db90763fa7..6a7ebcffd1e 100644 --- a/be/src/vec/exec/vpartition_sort_node.cpp +++ b/be/src/vec/exec/vpartition_sort_node.cpp @@ -116,7 +116,7 @@ void VPartitionSortNode::_emplace_into_hash_table(const ColumnRawPtrs& key_colum auto creator = [&](const auto& ctor, auto& key, auto& origin) { HashMethodType::try_presis_key(key, origin, *_agg_arena_pool); - auto aggregate_data = _pool->add(new PartitionBlocks()); + auto* aggregate_data = _pool->add(new PartitionBlocks()); _value_places.push_back(aggregate_data); ctor(key, aggregate_data); _num_partition++; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org