This is an automated email from the ASF dual-hosted git repository.

gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 8fd1eb0d1e [minor](hash table) parameterize hash table (#19653)
8fd1eb0d1e is described below

commit 8fd1eb0d1ebe85dae0424acdf34f936bfa2a6af0
Author: Gabriel <gabrielleeb...@gmail.com>
AuthorDate: Wed May 17 09:58:26 2023 +0800

    [minor](hash table) parameterize hash table (#19653)
---
 be/src/common/config.cpp                  |  3 +++
 be/src/common/config.h                    |  5 +++++
 be/src/vec/common/hash_table/hash_table.h | 10 +++++++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index ae043599b9..09fcde0d94 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -103,6 +103,9 @@ DEFINE_mInt64(mmap_threshold, "134217728"); // bytes
 // Increase can reduce the number of hash table resize, but may waste more 
memory.
 DEFINE_mInt32(hash_table_double_grow_degree, "31");
 
+DEFINE_mInt32(max_fill_rate, "2");
+
+DEFINE_mInt32(double_resize_threshold, "20");
 // Expand the hash table before inserting data, the maximum expansion size.
 // There are fewer duplicate keys, reducing the number of resize hash tables
 // There are many duplicate keys, and the hash table filled bucket is far less 
than the hash table build bucket.
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 9f6aab15f0..82fc2f3a57 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -141,6 +141,11 @@ DECLARE_mInt64(mmap_threshold); // bytes
 // Increase can reduce the number of hash table resize, but may waste more 
memory.
 DECLARE_mInt32(hash_table_double_grow_degree);
 
+// The max fill rate for hash table
+DECLARE_mInt32(max_fill_rate);
+
+DECLARE_mInt32(double_resize_threshold);
+
 // Expand the hash table before inserting data, the maximum expansion size.
 // There are fewer duplicate keys, reducing the number of resize hash tables
 // There are many duplicate keys, and the hash table filled bucket is far less 
than the hash table build bucket.
diff --git a/be/src/vec/common/hash_table/hash_table.h 
b/be/src/vec/common/hash_table/hash_table.h
index 1a1eafcc13..7ee35af64c 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -238,6 +238,8 @@ void insert_set_mapped(MappedType* dest, const ValueType& 
src) {
     *dest = src.second;
 }
 
+static doris::vectorized::Int32 double_resize_threshold = 
doris::config::double_resize_threshold;
+
 /** Determines the size of the hash table, and when and how much it should be 
resized.
   */
 template <size_t initial_size_degree = 10>
@@ -246,6 +248,8 @@ struct HashTableGrower {
     doris::vectorized::UInt8 size_degree = initial_size_degree;
     doris::vectorized::Int64 double_grow_degree = 
doris::config::hash_table_double_grow_degree;
 
+    doris::vectorized::Int32 max_fill_rate = doris::config::max_fill_rate;
+
     /// The size of the hash table in the cells.
     size_t buf_size() const { return 1ULL << size_degree; }
 
@@ -253,7 +257,7 @@ struct HashTableGrower {
     size_t max_fill() const {
         return size_degree < double_grow_degree
                        ? 1ULL << (size_degree - 1)
-                       : (1ULL << size_degree) - (1ULL << (size_degree - 2));
+                       : (1ULL << size_degree) - (1ULL << (size_degree - 
max_fill_rate));
     }
 
     size_t mask() const { return buf_size() - 1; }
@@ -271,7 +275,7 @@ struct HashTableGrower {
     bool overflow(size_t elems) const { return elems > max_fill(); }
 
     /// Increase the size of the hash table.
-    void increase_size() { size_degree += size_degree >= 23 ? 1 : 2; }
+    void increase_size() { size_degree += size_degree >= 
double_resize_threshold ? 1 : 2; }
 
     /// Set the buffer size by the number of elements in the hash table. Used 
when deserializing a hash table.
     void set(size_t num_elems) {
@@ -336,7 +340,7 @@ public:
     bool overflow(size_t elems) const { return elems > precalculated_max_fill; 
}
 
     /// Increase the size of the hash table.
-    void increase_size() { increase_size_degree(size_degree_ >= 23 ? 1 : 2); }
+    void increase_size() { increase_size_degree(size_degree_ >= 
double_resize_threshold ? 1 : 2); }
 
     /// Set the buffer size by the number of elements in the hash table. Used 
when deserializing a hash table.
     void set(size_t num_elems) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to