This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 8fd1eb0d1e [minor](hash table) parameterize hash table (#19653) 8fd1eb0d1e is described below commit 8fd1eb0d1ebe85dae0424acdf34f936bfa2a6af0 Author: Gabriel <gabrielleeb...@gmail.com> AuthorDate: Wed May 17 09:58:26 2023 +0800 [minor](hash table) parameterize hash table (#19653) --- be/src/common/config.cpp | 3 +++ be/src/common/config.h | 5 +++++ be/src/vec/common/hash_table/hash_table.h | 10 +++++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index ae043599b9..09fcde0d94 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -103,6 +103,9 @@ DEFINE_mInt64(mmap_threshold, "134217728"); // bytes // Increase can reduce the number of hash table resize, but may waste more memory. DEFINE_mInt32(hash_table_double_grow_degree, "31"); +DEFINE_mInt32(max_fill_rate, "2"); + +DEFINE_mInt32(double_resize_threshold, "20"); // Expand the hash table before inserting data, the maximum expansion size. // There are fewer duplicate keys, reducing the number of resize hash tables // There are many duplicate keys, and the hash table filled bucket is far less than the hash table build bucket. diff --git a/be/src/common/config.h b/be/src/common/config.h index 9f6aab15f0..82fc2f3a57 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -141,6 +141,11 @@ DECLARE_mInt64(mmap_threshold); // bytes // Increase can reduce the number of hash table resize, but may waste more memory. DECLARE_mInt32(hash_table_double_grow_degree); +// The max fill rate for hash table +DECLARE_mInt32(max_fill_rate); + +DECLARE_mInt32(double_resize_threshold); + // Expand the hash table before inserting data, the maximum expansion size. // There are fewer duplicate keys, reducing the number of resize hash tables // There are many duplicate keys, and the hash table filled bucket is far less than the hash table build bucket. diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h index 1a1eafcc13..7ee35af64c 100644 --- a/be/src/vec/common/hash_table/hash_table.h +++ b/be/src/vec/common/hash_table/hash_table.h @@ -238,6 +238,8 @@ void insert_set_mapped(MappedType* dest, const ValueType& src) { *dest = src.second; } +static doris::vectorized::Int32 double_resize_threshold = doris::config::double_resize_threshold; + /** Determines the size of the hash table, and when and how much it should be resized. */ template <size_t initial_size_degree = 10> @@ -246,6 +248,8 @@ struct HashTableGrower { doris::vectorized::UInt8 size_degree = initial_size_degree; doris::vectorized::Int64 double_grow_degree = doris::config::hash_table_double_grow_degree; + doris::vectorized::Int32 max_fill_rate = doris::config::max_fill_rate; + /// The size of the hash table in the cells. size_t buf_size() const { return 1ULL << size_degree; } @@ -253,7 +257,7 @@ struct HashTableGrower { size_t max_fill() const { return size_degree < double_grow_degree ? 1ULL << (size_degree - 1) - : (1ULL << size_degree) - (1ULL << (size_degree - 2)); + : (1ULL << size_degree) - (1ULL << (size_degree - max_fill_rate)); } size_t mask() const { return buf_size() - 1; } @@ -271,7 +275,7 @@ struct HashTableGrower { bool overflow(size_t elems) const { return elems > max_fill(); } /// Increase the size of the hash table. - void increase_size() { size_degree += size_degree >= 23 ? 1 : 2; } + void increase_size() { size_degree += size_degree >= double_resize_threshold ? 1 : 2; } /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. void set(size_t num_elems) { @@ -336,7 +340,7 @@ public: bool overflow(size_t elems) const { return elems > precalculated_max_fill; } /// Increase the size of the hash table. - void increase_size() { increase_size_degree(size_degree_ >= 23 ? 1 : 2); } + void increase_size() { increase_size_degree(size_degree_ >= double_resize_threshold ? 1 : 2); } /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table. void set(size_t num_elems) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org