This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch tpc_preview5
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 6cc8c2827de76ef49eb1fa2d7ecf5c20d7d8669f
Author: happenlee <[email protected]>
AuthorDate: Wed Dec 24 20:37:32 2025 +0800

    scanner merge after projection
---
 be/src/vec/exec/scan/scanner.cpp | 34 ++++++++++++++++++++++++++++++++--
 be/src/vec/exec/scan/scanner.h   |  9 +++++++++
 2 files changed, 41 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/exec/scan/scanner.cpp b/be/src/vec/exec/scan/scanner.cpp
index 5dced63feb6..52b378667d6 100644
--- a/be/src/vec/exec/scan/scanner.cpp
+++ b/be/src/vec/exec/scan/scanner.cpp
@@ -78,8 +78,38 @@ Status Scanner::init(RuntimeState* state, const 
VExprContextSPtrs& conjuncts) {
 Status Scanner::get_block_after_projects(RuntimeState* state, 
vectorized::Block* block, bool* eos) {
     auto& row_descriptor = _local_state->_parent->row_descriptor();
     if (_output_row_descriptor) {
-        
_origin_block.clear_column_data(row_descriptor.num_materialized_slots());
-        RETURN_IF_ERROR(get_block(state, &_origin_block, eos));
+        if (_alreay_eos) {
+            *eos = true;
+            _padding_block.swap(_origin_block);
+        } else {
+            
_origin_block.clear_column_data(row_descriptor.num_materialized_slots());
+            while (_padding_block.rows() < state->batch_size() / 4 && !*eos) {
+                RETURN_IF_ERROR(get_block(state, &_origin_block, eos));
+                if (_origin_block.rows() >= state->batch_size() / 4) {
+                    break;
+                }
+
+                if (_origin_block.rows() + _padding_block.rows() <= 
state->batch_size()) {
+                    _merge_padding_block();
+                    
_origin_block.clear_column_data(row_descriptor.num_materialized_slots());
+                } else {
+                    if (_origin_block.rows() < _padding_block.rows()) {
+                        _padding_block.swap(_origin_block);
+                    }
+                    break;
+                }
+            }
+        }
+
+        // first output the origin block change eos = false, next time output 
padding block
+        // set the eos to true
+        if (*eos && !_padding_block.empty() && !_origin_block.empty()) {
+            _alreay_eos = true;
+            *eos = false;
+        }
+        if (_origin_block.empty() && !_padding_block.empty()) {
+            _padding_block.swap(_origin_block);
+        }
         return _do_projections(&_origin_block, block);
     } else {
         return get_block(state, block, eos);
diff --git a/be/src/vec/exec/scan/scanner.h b/be/src/vec/exec/scan/scanner.h
index 26023c58b3b..b4b0cd7dbe6 100644
--- a/be/src/vec/exec/scan/scanner.h
+++ b/be/src/vec/exec/scan/scanner.h
@@ -103,6 +103,13 @@ protected:
     // Subclass should implement this to return data.
     virtual Status _get_block_impl(RuntimeState* state, Block* block, bool* 
eof) = 0;
 
+    void _merge_padding_block() {
+        if (_padding_block.empty()) {
+            _padding_block.swap(_origin_block);
+        }
+        
(void)MutableBlock::build_mutable_block(&_padding_block).merge(_origin_block);
+    }
+
     // Update the counters before closing this scanner
     virtual void _collect_profile_before_close();
 
@@ -217,6 +224,8 @@ protected:
     // Used in common subexpression elimination to compute intermediate 
results.
     std::vector<vectorized::VExprContextSPtrs> _intermediate_projections;
     vectorized::Block _origin_block;
+    vectorized::Block _padding_block;
+    bool _alreay_eos = false;
 
     VExprContextSPtrs _common_expr_ctxs_push_down;
     // Late arriving runtime filters will update _conjuncts.


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to