This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch dev-1.1.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/dev-1.1.1 by this push:
     new 7e44079212 [improvement 1.1.1]fix_vec_hash_table_resize_dev1.1.1 
(#10968)
7e44079212 is described below

commit 7e44079212188157871659f93e0eae3f326a36a8
Author: Xinyi Zou <zouxiny...@gmail.com>
AuthorDate: Mon Jul 18 14:37:04 2022 +0800

    [improvement 1.1.1]fix_vec_hash_table_resize_dev1.1.1 (#10968)
---
 be/CMakeLists.txt                         |  9 +++++++++
 be/src/vec/common/hash_table/hash_table.h | 31 +++++++++++++++++++++++++------
 be/src/vec/exec/join/vhash_join_node.cpp  |  1 +
 build.sh                                  |  6 ++++++
 run-be-ut.sh                              |  1 +
 5 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index d2368f4b7b..6118cae793 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -415,6 +415,15 @@ if (WITH_LZO)
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DDORIS_WITH_LZO")
 endif()
 
+# STRICT_MEMORY_USE=ON` expects BE to use less memory, and gives priority to 
ensuring stability
+# when the cluster memory is limited.
+# TODO In the future, expect a dynamic soft memory limit, combined with 
real-time memory usage of the cluster,
+# to control the main memory consumers, including HashTable, LRU Cache 
elimination strategy,
+# ChunkAllocator cache strategy, Disk IO buffer cache strategy, etc.
+if (STRICT_MEMORY_USE)
+    set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DSTRICT_MEMORY_USE")
+endif()
+
 if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 7.0)
     set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -faligned-new")
 endif()
diff --git a/be/src/vec/common/hash_table/hash_table.h 
b/be/src/vec/common/hash_table/hash_table.h
index 920c819694..a60e71e42b 100644
--- a/be/src/vec/common/hash_table/hash_table.h
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -244,11 +244,22 @@ template <size_t initial_size_degree = 10>
 struct HashTableGrower {
     /// The state of this structure is enough to get the buffer size of the 
hash table.
     doris::vectorized::UInt8 size_degree = initial_size_degree;
+    doris::vectorized::Int64 double_grow_degree = 31; // 2GB
 
     /// The size of the hash table in the cells.
     size_t buf_size() const { return 1ULL << size_degree; }
 
+#ifndef STRICT_MEMORY_USE
     size_t max_fill() const { return 1ULL << (size_degree - 1); }
+#else
+    // When capacity is greater than 2G, grow when 75% of the capacity is 
satisfied.
+    size_t max_fill() const {
+        return size_degree < double_grow_degree
+                       ? 1ULL << (size_degree - 1)
+                       : (1ULL << size_degree) - (1ULL << (size_degree - 2));
+    }
+#endif
+
     size_t mask() const { return buf_size() - 1; }
 
     /// From the hash value, get the cell number in the hash table.
@@ -268,12 +279,20 @@ struct HashTableGrower {
 
     /// Set the buffer size by the number of elements in the hash table. Used 
when deserializing a hash table.
     void set(size_t num_elems) {
-        size_degree =
-                num_elems <= 1
-                        ? initial_size_degree
-                        : ((initial_size_degree > 
static_cast<size_t>(log2(num_elems - 1)) + 2)
-                                   ? initial_size_degree
-                                   : (static_cast<size_t>(log2(num_elems - 1)) 
+ 2));
+#ifndef STRICT_MEMORY_USE
+        size_t fill_capacity = static_cast<size_t>(log2(num_elems - 1)) + 2;
+#else
+        size_t fill_capacity = static_cast<size_t>(log2(num_elems - 1)) + 1;
+        fill_capacity =
+                fill_capacity < double_grow_degree
+                        ? fill_capacity + 1
+                        : (num_elems < (1ULL << fill_capacity) - (1ULL << 
(fill_capacity - 2))
+                                   ? fill_capacity
+                                   : fill_capacity + 1);
+#endif
+        size_degree = num_elems <= 1 ? initial_size_degree
+                                     : (initial_size_degree > fill_capacity ? 
initial_size_degree
+                                                                            : 
fill_capacity);
     }
 
     void set_buf_size(size_t buf_size_) {
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp 
b/be/src/vec/exec/join/vhash_join_node.cpp
index 0309b4fa94..efab21aaf1 100644
--- a/be/src/vec/exec/join/vhash_join_node.cpp
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -69,6 +69,7 @@ struct ProcessHashTableBuild {
         SCOPED_TIMER(_join_node->_build_table_insert_timer);
         // only not build_unique, we need expanse hash table before insert data
         if constexpr (!build_unique) {
+            // _rows contains null row, which will cause hash table resize to 
be large.
             hash_table_ctx.hash_table.expanse_for_add_elem(_rows);
         }
         hash_table_ctx.hash_table.reset_resize_timer();
diff --git a/build.sh b/build.sh
index 7d1071fd19..4f53378a8e 100755
--- a/build.sh
+++ b/build.sh
@@ -215,6 +215,10 @@ if [[ -z ${STRIP_DEBUG_INFO} ]]; then
     STRIP_DEBUG_INFO=OFF
 fi
 
+if [[ -z ${STRICT_MEMORY_USE} ]]; then
+    STRICT_MEMORY_USE=OFF
+fi
+
 echo "Get params:
     BUILD_BE            -- $BUILD_BE
     BUILD_FE            -- $BUILD_FE
@@ -231,6 +235,7 @@ echo "Get params:
     BUILD_META_TOOL     -- $BUILD_META_TOOL
     USE_LLD             -- $USE_LLD
     STRIP_DEBUG_INFO    -- $STRIP_DEBUG_INFO
+    STRICT_MEMORY_USE   -- $STRICT_MEMORY_USE
 "
 
 # Clean and build generated code
@@ -267,6 +272,7 @@ if [ ${BUILD_BE} -eq 1 ] ; then
             -DBUILD_META_TOOL=${BUILD_META_TOOL} \
             -DUSE_LLD=${USE_LLD} \
             -DSTRIP_DEBUG_INFO=${STRIP_DEBUG_INFO} \
+            -DSTRICT_MEMORY_USE=${STRICT_MEMORY_USE} \
             -DUSE_AVX2=${USE_AVX2} \
             -DGLIBC_COMPATIBILITY=${GLIBC_COMPATIBILITY} ../
     ${BUILD_SYSTEM} -j ${PARALLEL}
diff --git a/run-be-ut.sh b/run-be-ut.sh
index af8ae5bac7..930c013918 100755
--- a/run-be-ut.sh
+++ b/run-be-ut.sh
@@ -142,6 +142,7 @@ ${CMAKE_CMD} -G "${GENERATOR}" \
     -DBUILD_META_TOOL=OFF \
     -DWITH_MYSQL=OFF \
     -DWITH_KERBEROS=OFF \
+    -DSTRICT_MEMORY_USE=OFF \
     ${CMAKE_USE_CCACHE} ../
 ${BUILD_SYSTEM} -j ${PARALLEL} $RUN_FILE
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to