This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch 2.1-tmp in repository https://gitbox.apache.org/repos/asf/doris.git
commit 45707f3019e7f8e74a838a93960e3da5b8a90dd3 Author: lihangyu <15605149...@163.com> AuthorDate: Tue Apr 2 11:15:28 2024 +0800 [Optimize] Move strings_pool from individual tree nodes to the tree itself (#33089) Previously, strings_pool was allocated within each tree node. However, due to the Arena's alignment of allocated chunks to at least 4K, this allocation size was excessively large for a single tree node. Consequently, when there are numerous nodes within the SubcolumnTree, a significant portion of memory was wasted. Moving strings_pool to the tree itself optimizes memory usage and reduces wastage, improving overall efficiency. --- be/src/vec/columns/subcolumn_tree.h | 48 ++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/be/src/vec/columns/subcolumn_tree.h b/be/src/vec/columns/subcolumn_tree.h index 86baa46e4c8..75893f7a9f7 100644 --- a/be/src/vec/columns/subcolumn_tree.h +++ b/be/src/vec/columns/subcolumn_tree.h @@ -24,6 +24,7 @@ #include "runtime/exec_env.h" #include "runtime/thread_context.h" #include "vec/columns/column.h" +#include "vec/common/arena.h" #include "vec/common/hash_table/hash_map.h" #include "vec/common/string_ref.h" #include "vec/data_types/data_type.h" @@ -38,28 +39,17 @@ public: struct Node { enum Kind { TUPLE, NESTED, SCALAR }; - explicit Node(Kind kind_) : kind(kind_) { init_memory(); } - Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) { init_memory(); } + explicit Node(Kind kind_) : kind(kind_) {} + Node(Kind kind_, const NodeData& data_) : kind(kind_), data(data_) {} Node(Kind kind_, const NodeData& data_, const PathInData& path_) - : kind(kind_), data(data_), path(path_) { - init_memory(); - } - Node(Kind kind_, NodeData&& data_) : kind(kind_), data(std::move(data_)) { init_memory(); } + : kind(kind_), data(data_), path(path_) {} + Node(Kind kind_, NodeData&& data_) : kind(kind_), data(std::move(data_)) {} Node(Kind kind_, NodeData&& data_, const PathInData& path_) - : kind(kind_), data(std::move(data_)), path(path_) { - init_memory(); - } - - ~Node() { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER( - ExecEnv::GetInstance()->subcolumns_tree_tracker()); - strings_pool.reset(); - } + : kind(kind_), data(std::move(data_)), path(path_) {} Kind kind = TUPLE; const Node* parent = nullptr; - std::unique_ptr<Arena> strings_pool; std::unordered_map<StringRef, std::shared_ptr<Node>, StringRefHash> children; NodeData data; @@ -70,12 +60,6 @@ public: bool is_leaf_node() const { return kind == SCALAR && children.empty(); } - void init_memory() { - SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER( - ExecEnv::GetInstance()->subcolumns_tree_tracker()); - strings_pool = std::make_unique<Arena>(); - } - // Only modify data and kind void modify(std::shared_ptr<Node>&& other) { data = std::move(other->data); @@ -89,13 +73,13 @@ public: kind = Kind::SCALAR; } - void add_child(std::string_view key, std::shared_ptr<Node> next_node) { + void add_child(std::string_view key, std::shared_ptr<Node> next_node, Arena& strings_pool) { next_node->parent = this; StringRef key_ref; { SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER( ExecEnv::GetInstance()->subcolumns_tree_tracker()); - key_ref = {strings_pool->insert(key.data(), key.length()), key.length()}; + key_ref = {strings_pool.insert(key.data(), key.length()), key.length()}; } children[key_ref] = std::move(next_node); } @@ -186,7 +170,7 @@ public: } else { auto next_kind = parts[i].is_nested ? Node::NESTED : Node::TUPLE; auto next_node = node_creator(next_kind, false); - current_node->add_child(String(parts[i].key), next_node); + current_node->add_child(String(parts[i].key), next_node, *strings_pool); current_node = next_node.get(); } } @@ -202,7 +186,7 @@ public: } auto next_node = node_creator(Node::SCALAR, false); - current_node->add_child(String(parts.back().key), next_node); + current_node->add_child(String(parts.back().key), next_node, *strings_pool); leaves.push_back(std::move(next_node)); return true; @@ -287,6 +271,16 @@ public: const_iterator begin() const { return leaves.begin(); } const_iterator end() const { return leaves.end(); } + ~SubcolumnsTree() { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->subcolumns_tree_tracker()); + strings_pool.reset(); + } + + SubcolumnsTree() { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(ExecEnv::GetInstance()->subcolumns_tree_tracker()); + strings_pool = std::make_shared<Arena>(); + } + private: const Node* find_impl(const PathInData& path, bool find_exact) const { if (!root) { @@ -307,7 +301,7 @@ private: return current_node; } - + std::shared_ptr<Arena> strings_pool; NodePtr root; Nodes leaves; }; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org