This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-unstable
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 83f243acf0ddcfd49b81b661889d59268e2994f2
Author: zhengyu <freeman.zhang1...@gmail.com>
AuthorDate: Wed Nov 9 08:40:07 2022 +0800

    [enhancement](load) shrink reserved buffer for page builder (#14012) 
(#14014)
    
    * [enhancement](load) shrink reserved buffer for page builder (#14012)
    
    For table with hundreds of text type columns, flushing its memtable may 
cost huge memory.
    These memory are consumed when initializing page builder, as it reserves 
1MB for each column.
    So memory consumption grows in proportion with column number. Shrinking the 
reservation may
    reduce memory substantially in load process.
    
    Signed-off-by: freemandealer <freeman.zhang1...@gmail.com>
    
    * response to the review
    
    Signed-off-by: freemandealer <freeman.zhang1...@gmail.com>
    
    * Update binary_plain_page.h
    
    * Update binary_dict_page.cpp
    
    * Update binary_plain_page.h
    
    Signed-off-by: freemandealer <freeman.zhang1...@gmail.com>
---
 be/src/olap/rowset/segment_v2/binary_dict_page.cpp |  8 +++++++-
 be/src/olap/rowset/segment_v2/binary_plain_page.h  | 14 +++++++++++---
 be/src/olap/rowset/segment_v2/options.h            |  2 ++
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp 
b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index b8d3fa98c6..f86c16f134 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -37,7 +37,9 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const 
PageBuilderOptions& options)
     // TODO: the data page builder type can be created by Factory according to 
user config
     _data_page_builder.reset(new 
BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options));
     PageBuilderOptions dict_builder_options;
-    dict_builder_options.data_page_size = _options.dict_page_size;
+    dict_builder_options.data_page_size =
+            std::min(_options.data_page_size, _options.dict_page_size);
+    dict_builder_options.is_dict_page = true;
     _dict_builder.reset(new 
BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options));
     reset();
 }
@@ -118,6 +120,10 @@ Status BinaryDictPageBuilder::add(const uint8_t* vals, 
size_t* count) {
 }
 
 OwnedSlice BinaryDictPageBuilder::finish() {
+    if (VLOG_DEBUG_IS_ON && _encoding_type == DICT_ENCODING) {
+        VLOG_DEBUG << "dict page size:" << _dict_builder->size();
+    }
+
     DCHECK(!_finished);
     _finished = true;
 
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h 
b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index 96cfc392a5..9faaeab3eb 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -53,8 +53,14 @@ public:
     }
 
     bool is_page_full() override {
-        // data_page_size is 0, do not limit the page size
-        return _options.data_page_size != 0 && _size_estimate > 
_options.data_page_size;
+        bool ret = false;
+        if (_options.is_dict_page) {
+            // dict_page_size is 0, do not limit the page size
+            ret = _options.dict_page_size != 0 && _size_estimate > 
_options.dict_page_size;
+        } else {
+            ret = _options.data_page_size != 0 && _size_estimate > 
_options.data_page_size;
+        }
+        return ret;
     }
 
     Status add(const uint8_t* vals, size_t* count) override {
@@ -104,7 +110,9 @@ public:
     void reset() override {
         _offsets.clear();
         _buffer.clear();
-        _buffer.reserve(_options.data_page_size == 0 ? 1024 : 
_options.data_page_size);
+        _buffer.reserve(_options.data_page_size == 0
+                                ? 1024
+                                : std::min(_options.data_page_size, 
_options.dict_page_size));
         _size_estimate = sizeof(uint32_t);
         _finished = false;
         _last_value_size = 0;
diff --git a/be/src/olap/rowset/segment_v2/options.h 
b/be/src/olap/rowset/segment_v2/options.h
index 9405eb19cf..19041f4c51 100644
--- a/be/src/olap/rowset/segment_v2/options.h
+++ b/be/src/olap/rowset/segment_v2/options.h
@@ -30,6 +30,8 @@ struct PageBuilderOptions {
     size_t dict_page_size = DEFAULT_PAGE_SIZE;
 
     bool need_check_bitmap = true;
+
+    bool is_dict_page = false; // page used for saving dictionary
 };
 
 struct PageDecoderOptions {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to