This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 0348b336018 [fix](parquet-reader) Fixed the issue of excessive scanning data in late materialization case of parquet reader (#46121) 0348b336018 is described below commit 0348b336018644953aea8e71fa636239c3096dca Author: Qi Chen <che...@selectdb.com> AuthorDate: Mon Dec 30 22:48:53 2024 +0800 [fix](parquet-reader) Fixed the issue of excessive scanning data in late materialization case of parquet reader (#46121) ### What problem does this PR solve? Related PR: #40641 Problem Summary: [Fix](parquet-reader) Fixed the issue of excessive scanning data in late materialization case of parquet reader introduced by #40641 in scenarios with particularly high filtering rates. --- be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index a9854b53f3b..770ed1f02ac 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -522,16 +522,18 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re Block::erase_useless_column(block, origin_column_num); if (!pre_eof) { - if (pre_raw_read_rows >= config::doris_scanner_row_num) { - break; - } // If continuous batches are skipped, we can cache them to skip a whole page _cached_filtered_rows += pre_read_rows; + if (pre_raw_read_rows >= config::doris_scanner_row_num) { + *read_rows = 0; + _convert_dict_cols_to_string_cols(block); + return Status::OK(); + } } else { // pre_eof // If filter_map_ptr->filter_all() and pre_eof, we can skip whole row group. *read_rows = 0; *batch_eof = true; - _lazy_read_filtered_rows += pre_read_rows; + _lazy_read_filtered_rows += (pre_read_rows + _cached_filtered_rows); _convert_dict_cols_to_string_cols(block); return Status::OK(); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org