This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-unstable in repository https://gitbox.apache.org/repos/asf/doris.git
commit 83f243acf0ddcfd49b81b661889d59268e2994f2 Author: zhengyu <freeman.zhang1...@gmail.com> AuthorDate: Wed Nov 9 08:40:07 2022 +0800 [enhancement](load) shrink reserved buffer for page builder (#14012) (#14014) * [enhancement](load) shrink reserved buffer for page builder (#14012) For table with hundreds of text type columns, flushing its memtable may cost huge memory. These memory are consumed when initializing page builder, as it reserves 1MB for each column. So memory consumption grows in proportion with column number. Shrinking the reservation may reduce memory substantially in load process. Signed-off-by: freemandealer <freeman.zhang1...@gmail.com> * response to the review Signed-off-by: freemandealer <freeman.zhang1...@gmail.com> * Update binary_plain_page.h * Update binary_dict_page.cpp * Update binary_plain_page.h Signed-off-by: freemandealer <freeman.zhang1...@gmail.com> --- be/src/olap/rowset/segment_v2/binary_dict_page.cpp | 8 +++++++- be/src/olap/rowset/segment_v2/binary_plain_page.h | 14 +++++++++++--- be/src/olap/rowset/segment_v2/options.h | 2 ++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp index b8d3fa98c6..f86c16f134 100644 --- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp @@ -37,7 +37,9 @@ BinaryDictPageBuilder::BinaryDictPageBuilder(const PageBuilderOptions& options) // TODO: the data page builder type can be created by Factory according to user config _data_page_builder.reset(new BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>(options)); PageBuilderOptions dict_builder_options; - dict_builder_options.data_page_size = _options.dict_page_size; + dict_builder_options.data_page_size = + std::min(_options.data_page_size, _options.dict_page_size); + dict_builder_options.is_dict_page = true; _dict_builder.reset(new BinaryPlainPageBuilder<OLAP_FIELD_TYPE_VARCHAR>(dict_builder_options)); reset(); } @@ -118,6 +120,10 @@ Status BinaryDictPageBuilder::add(const uint8_t* vals, size_t* count) { } OwnedSlice BinaryDictPageBuilder::finish() { + if (VLOG_DEBUG_IS_ON && _encoding_type == DICT_ENCODING) { + VLOG_DEBUG << "dict page size:" << _dict_builder->size(); + } + DCHECK(!_finished); _finished = true; diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h index 96cfc392a5..9faaeab3eb 100644 --- a/be/src/olap/rowset/segment_v2/binary_plain_page.h +++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h @@ -53,8 +53,14 @@ public: } bool is_page_full() override { - // data_page_size is 0, do not limit the page size - return _options.data_page_size != 0 && _size_estimate > _options.data_page_size; + bool ret = false; + if (_options.is_dict_page) { + // dict_page_size is 0, do not limit the page size + ret = _options.dict_page_size != 0 && _size_estimate > _options.dict_page_size; + } else { + ret = _options.data_page_size != 0 && _size_estimate > _options.data_page_size; + } + return ret; } Status add(const uint8_t* vals, size_t* count) override { @@ -104,7 +110,9 @@ public: void reset() override { _offsets.clear(); _buffer.clear(); - _buffer.reserve(_options.data_page_size == 0 ? 1024 : _options.data_page_size); + _buffer.reserve(_options.data_page_size == 0 + ? 1024 + : std::min(_options.data_page_size, _options.dict_page_size)); _size_estimate = sizeof(uint32_t); _finished = false; _last_value_size = 0; diff --git a/be/src/olap/rowset/segment_v2/options.h b/be/src/olap/rowset/segment_v2/options.h index 9405eb19cf..19041f4c51 100644 --- a/be/src/olap/rowset/segment_v2/options.h +++ b/be/src/olap/rowset/segment_v2/options.h @@ -30,6 +30,8 @@ struct PageBuilderOptions { size_t dict_page_size = DEFAULT_PAGE_SIZE; bool need_check_bitmap = true; + + bool is_dict_page = false; // page used for saving dictionary }; struct PageDecoderOptions { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org