This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f96ac18f082 [Improvement](segment iterator) Optimize column row 
reservation to reduce overhead #42060 (#42372)
f96ac18f082 is described below

commit f96ac18f0826f4255ca8b12b01c81d18b1ff03fd
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Thu Oct 24 14:05:44 2024 +0800

    [Improvement](segment iterator) Optimize column row reservation to reduce 
overhead #42060 (#42372)
    
    cherry pick from #42060
---
 be/src/olap/rowset/segment_v2/segment_iterator.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 04ec5830d28..faad089e09f 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1987,6 +1987,9 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
     if (UNLIKELY(!_lazy_inited)) {
         RETURN_IF_ERROR(_lazy_init());
         _lazy_inited = true;
+        // If the row bitmap size is smaller than block_row_max, there's no 
need to reserve that many column rows.
+        auto nrows_reserve_limit =
+                std::min(_row_bitmap.cardinality(), 
uint64_t(_opts.block_row_max));
         if (_lazy_materialization_read || _opts.record_rowids || 
_is_need_expr_eval) {
             _block_rowids.resize(_opts.block_row_max);
         }
@@ -2011,7 +2014,7 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                                 storage_column_type->is_nullable(), 
_opts.io_ctx.reader_type));
                 _current_return_columns[cid]->set_rowset_segment_id(
                         {_segment->rowset_id(), _segment->id()});
-                _current_return_columns[cid]->reserve(_opts.block_row_max);
+                _current_return_columns[cid]->reserve(nrows_reserve_limit);
             } else if (i >= block->columns()) {
                 // if i >= block->columns means the column and not the 
pred_column means `column i` is
                 // a delete condition column. but the column is not effective 
in the segment. so we just
@@ -2022,7 +2025,7 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
                 // TODO: skip read the not effective delete column to speed up 
segment read.
                 _current_return_columns[cid] =
                         
Schema::get_data_type_ptr(*column_desc)->create_column();
-                _current_return_columns[cid]->reserve(_opts.block_row_max);
+                _current_return_columns[cid]->reserve(nrows_reserve_limit);
             }
         }
 
@@ -2047,7 +2050,8 @@ Status 
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
     if (_can_opt_topn_reads()) {
         nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit), 
nrows_read_limit);
     }
-
+    // If the row bitmap size is smaller than nrows_read_limit, there's no 
need to reserve that many column rows.
+    nrows_read_limit = std::min(_row_bitmap.cardinality(), 
uint64_t(nrows_read_limit));
     DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", {
         if (nrows_read_limit != 1) {
             return Status::Error<ErrorCode::INTERNAL_ERROR>("topn opt 1 
execute failed: {}",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to