This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3d79955db37 branch-2.1: [fix](parquet-reader) Fixed the issue of 
excessive scanning data in  late materialization‌ case of parquet reader #46121 
(#46183)
3d79955db37 is described below

commit 3d79955db37d8e0c71e5d55ea90a85d66caeae57
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Dec 31 07:30:49 2024 +0800

    branch-2.1: [fix](parquet-reader) Fixed the issue of excessive scanning 
data in  late materialization‌ case of parquet reader #46121 (#46183)
    
    Cherry-picked from #46121
    
    Co-authored-by: Qi Chen <che...@selectdb.com>
---
 be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index d662c174d9b..a63e098c97c 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -520,16 +520,18 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t 
batch_size, size_t* re
             Block::erase_useless_column(block, origin_column_num);
 
             if (!pre_eof) {
-                if (pre_raw_read_rows >= config::doris_scanner_row_num) {
-                    break;
-                }
                 // If continuous batches are skipped, we can cache them to 
skip a whole page
                 _cached_filtered_rows += pre_read_rows;
+                if (pre_raw_read_rows >= config::doris_scanner_row_num) {
+                    *read_rows = 0;
+                    _convert_dict_cols_to_string_cols(block);
+                    return Status::OK();
+                }
             } else { // pre_eof
                 // If select_vector_ptr->filter_all() and pre_eof, we can skip 
whole row group.
                 *read_rows = 0;
                 *batch_eof = true;
-                _lazy_read_filtered_rows += pre_read_rows;
+                _lazy_read_filtered_rows += (pre_read_rows + 
_cached_filtered_rows);
                 _convert_dict_cols_to_string_cols(block);
                 return Status::OK();
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to