This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch 2.1-tmp
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 45707f3019e7f8e74a838a93960e3da5b8a90dd3
Author: lihangyu <15605149...@163.com>
AuthorDate: Tue Apr 2 11:15:28 2024 +0800

    [Optimize] Move strings_pool from individual tree nodes to the tree itself 
(#33089)
    
    Previously, strings_pool was allocated within each tree node. However, due 
to the Arena's alignment of allocated chunks to at least 4K, this allocation 
size was excessively large for a single tree node. Consequently, when there are 
numerous nodes within the SubcolumnTree, a significant portion of memory was 
wasted. Moving strings_pool to the tree itself optimizes memory usage and 
reduces wastage, improving overall efficiency.
---
 be/src/vec/columns/subcolumn_tree.h | 48 ++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 27 deletions(-)

diff --git a/be/src/vec/columns/subcolumn_tree.h 
b/be/src/vec/columns/subcolumn_tree.h
index 86baa46e4c8..75893f7a9f7 100644
--- a/be/src/vec/columns/subcolumn_tree.h
+++ b/be/src/vec/columns/subcolumn_tree.h
@@ -24,6 +24,7 @@
 #include "runtime/exec_env.h"
 #include "runtime/thread_context.h"
 #include "vec/columns/column.h"
+#include "vec/common/arena.h"
 #include "vec/common/hash_table/hash_map.h"
 #include "vec/common/string_ref.h"
 #include "vec/data_types/data_type.h"
@@ -38,28 +39,17 @@ public:
     struct Node {
         enum Kind { TUPLE, NESTED, SCALAR };
 
-        explicit Node(Kind kind_) : kind(kind_) { init_memory(); }
-        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) { 
init_memory(); }
+        explicit Node(Kind kind_) : kind(kind_) {}
+        Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {}
         Node(Kind kind_, const NodeData& data_, const PathInData& path_)
-                : kind(kind_), data(data_), path(path_) {
-            init_memory();
-        }
-        Node(Kind kind_, NodeData&& data_) : kind(kind_), 
data(std::move(data_)) { init_memory(); }
+                : kind(kind_), data(data_), path(path_) {}
+        Node(Kind kind_, NodeData&& data_) : kind(kind_), 
data(std::move(data_)) {}
         Node(Kind kind_, NodeData&& data_, const PathInData& path_)
-                : kind(kind_), data(std::move(data_)), path(path_) {
-            init_memory();
-        }
-
-        ~Node() {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(
-                    ExecEnv::GetInstance()->subcolumns_tree_tracker());
-            strings_pool.reset();
-        }
+                : kind(kind_), data(std::move(data_)), path(path_) {}
 
         Kind kind = TUPLE;
         const Node* parent = nullptr;
 
-        std::unique_ptr<Arena> strings_pool;
         std::unordered_map<StringRef, std::shared_ptr<Node>, StringRefHash> 
children;
 
         NodeData data;
@@ -70,12 +60,6 @@ public:
 
         bool is_leaf_node() const { return kind == SCALAR && children.empty(); 
}
 
-        void init_memory() {
-            SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(
-                    ExecEnv::GetInstance()->subcolumns_tree_tracker());
-            strings_pool = std::make_unique<Arena>();
-        }
-
         // Only modify data and kind
         void modify(std::shared_ptr<Node>&& other) {
             data = std::move(other->data);
@@ -89,13 +73,13 @@ public:
             kind = Kind::SCALAR;
         }
 
-        void add_child(std::string_view key, std::shared_ptr<Node> next_node) {
+        void add_child(std::string_view key, std::shared_ptr<Node> next_node, 
Arena& strings_pool) {
             next_node->parent = this;
             StringRef key_ref;
             {
                 SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(
                         ExecEnv::GetInstance()->subcolumns_tree_tracker());
-                key_ref = {strings_pool->insert(key.data(), key.length()), 
key.length()};
+                key_ref = {strings_pool.insert(key.data(), key.length()), 
key.length()};
             }
             children[key_ref] = std::move(next_node);
         }
@@ -186,7 +170,7 @@ public:
             } else {
                 auto next_kind = parts[i].is_nested ? Node::NESTED : 
Node::TUPLE;
                 auto next_node = node_creator(next_kind, false);
-                current_node->add_child(String(parts[i].key), next_node);
+                current_node->add_child(String(parts[i].key), next_node, 
*strings_pool);
                 current_node = next_node.get();
             }
         }
@@ -202,7 +186,7 @@ public:
         }
 
         auto next_node = node_creator(Node::SCALAR, false);
-        current_node->add_child(String(parts.back().key), next_node);
+        current_node->add_child(String(parts.back().key), next_node, 
*strings_pool);
         leaves.push_back(std::move(next_node));
 
         return true;
@@ -287,6 +271,16 @@ public:
     const_iterator begin() const { return leaves.begin(); }
     const_iterator end() const { return leaves.end(); }
 
+    ~SubcolumnsTree() {
+        
SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->subcolumns_tree_tracker());
+        strings_pool.reset();
+    }
+
+    SubcolumnsTree() {
+        
SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->subcolumns_tree_tracker());
+        strings_pool = std::make_shared<Arena>();
+    }
+
 private:
     const Node* find_impl(const PathInData& path, bool find_exact) const {
         if (!root) {
@@ -307,7 +301,7 @@ private:
 
         return current_node;
     }
-
+    std::shared_ptr<Arena> strings_pool;
     NodePtr root;
     Nodes leaves;
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to