This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 86536167a0a [Improvement](segment iterator) Optimize column row
reservation to reduce overhead (#42060)
86536167a0a is described below
commit 86536167a0a099d53c1a787f167905dcfa049bae
Author: airborne12 <[email protected]>
AuthorDate: Tue Oct 22 18:51:09 2024 +0800
[Improvement](segment iterator) Optimize column row reservation to reduce
overhead (#42060)
## Proposed changes
This PR improves the segment iterator by reducing the reserved rows for
a column when the row bitmap is smaller than the block row. This
optimization aims to enhance memory efficiency and improve performance
by reducing unnecessary row allocations.
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index a921674a1e5..985cdc16e68 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1989,6 +1989,9 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
if (UNLIKELY(!_lazy_inited)) {
RETURN_IF_ERROR(_lazy_init());
_lazy_inited = true;
+ // If the row bitmap size is smaller than block_row_max, there's no
need to reserve that many column rows.
+ auto nrows_reserve_limit =
+ std::min(_row_bitmap.cardinality(),
uint64_t(_opts.block_row_max));
if (_lazy_materialization_read || _opts.record_rowids ||
_is_need_expr_eval) {
_block_rowids.resize(_opts.block_row_max);
}
@@ -2013,7 +2016,7 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
storage_column_type->is_nullable(),
_opts.io_ctx.reader_type));
_current_return_columns[cid]->set_rowset_segment_id(
{_segment->rowset_id(), _segment->id()});
- _current_return_columns[cid]->reserve(_opts.block_row_max);
+ _current_return_columns[cid]->reserve(nrows_reserve_limit);
} else if (i >= block->columns()) {
// if i >= block->columns means the column and not the
pred_column means `column i` is
// a delete condition column. but the column is not effective
in the segment. so we just
@@ -2024,7 +2027,7 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
// TODO: skip read the not effective delete column to speed up
segment read.
_current_return_columns[cid] =
Schema::get_data_type_ptr(*column_desc)->create_column();
- _current_return_columns[cid]->reserve(_opts.block_row_max);
+ _current_return_columns[cid]->reserve(nrows_reserve_limit);
}
}
@@ -2049,7 +2052,8 @@ Status
SegmentIterator::_next_batch_internal(vectorized::Block* block) {
if (_can_opt_topn_reads()) {
nrows_read_limit = std::min(static_cast<uint32_t>(_opts.topn_limit),
nrows_read_limit);
}
-
+ // If the row bitmap size is smaller than nrows_read_limit, there's no
need to reserve that many column rows.
+ nrows_read_limit = std::min(_row_bitmap.cardinality(),
uint64_t(nrows_read_limit));
DBUG_EXECUTE_IF("segment_iterator.topn_opt_1", {
if (nrows_read_limit != 1) {
return Status::Error<ErrorCode::INTERNAL_ERROR>("topn opt 1
execute failed: {}",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]