This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch tpc_preview5 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 6cc8c2827de76ef49eb1fa2d7ecf5c20d7d8669f Author: happenlee <[email protected]> AuthorDate: Wed Dec 24 20:37:32 2025 +0800 scanner merge after projection --- be/src/vec/exec/scan/scanner.cpp | 34 ++++++++++++++++++++++++++++++++-- be/src/vec/exec/scan/scanner.h | 9 +++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/scan/scanner.cpp b/be/src/vec/exec/scan/scanner.cpp index 5dced63feb6..52b378667d6 100644 --- a/be/src/vec/exec/scan/scanner.cpp +++ b/be/src/vec/exec/scan/scanner.cpp @@ -78,8 +78,38 @@ Status Scanner::init(RuntimeState* state, const VExprContextSPtrs& conjuncts) { Status Scanner::get_block_after_projects(RuntimeState* state, vectorized::Block* block, bool* eos) { auto& row_descriptor = _local_state->_parent->row_descriptor(); if (_output_row_descriptor) { - _origin_block.clear_column_data(row_descriptor.num_materialized_slots()); - RETURN_IF_ERROR(get_block(state, &_origin_block, eos)); + if (_alreay_eos) { + *eos = true; + _padding_block.swap(_origin_block); + } else { + _origin_block.clear_column_data(row_descriptor.num_materialized_slots()); + while (_padding_block.rows() < state->batch_size() / 4 && !*eos) { + RETURN_IF_ERROR(get_block(state, &_origin_block, eos)); + if (_origin_block.rows() >= state->batch_size() / 4) { + break; + } + + if (_origin_block.rows() + _padding_block.rows() <= state->batch_size()) { + _merge_padding_block(); + _origin_block.clear_column_data(row_descriptor.num_materialized_slots()); + } else { + if (_origin_block.rows() < _padding_block.rows()) { + _padding_block.swap(_origin_block); + } + break; + } + } + } + + // first output the origin block change eos = false, next time output padding block + // set the eos to true + if (*eos && !_padding_block.empty() && !_origin_block.empty()) { + _alreay_eos = true; + *eos = false; + } + if (_origin_block.empty() && !_padding_block.empty()) { + _padding_block.swap(_origin_block); + } return _do_projections(&_origin_block, block); } else { return get_block(state, block, eos); diff --git a/be/src/vec/exec/scan/scanner.h b/be/src/vec/exec/scan/scanner.h index 26023c58b3b..b4b0cd7dbe6 100644 --- a/be/src/vec/exec/scan/scanner.h +++ b/be/src/vec/exec/scan/scanner.h @@ -103,6 +103,13 @@ protected: // Subclass should implement this to return data. virtual Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) = 0; + void _merge_padding_block() { + if (_padding_block.empty()) { + _padding_block.swap(_origin_block); + } + (void)MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block); + } + // Update the counters before closing this scanner virtual void _collect_profile_before_close(); @@ -217,6 +224,8 @@ protected: // Used in common subexpression elimination to compute intermediate results. std::vector<vectorized::VExprContextSPtrs> _intermediate_projections; vectorized::Block _origin_block; + vectorized::Block _padding_block; + bool _alreay_eos = false; VExprContextSPtrs _common_expr_ctxs_push_down; // Late arriving runtime filters will update _conjuncts. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
