This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new fc70179acb [multi-catalog](fix) the eof of lazy read columns may be not equal to the eof of predicate columns (#14212) fc70179acb is described below commit fc70179acb8ed096527f537b821612434c37aa46 Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Mon Nov 14 14:37:21 2022 +0800 [multi-catalog](fix) the eof of lazy read columns may be not equal to the eof of predicate columns (#14212) Fix three bugs: 1. The EOF of lazy read columns may be not equal to the EOF of predicate columns. (for example: If the predicate column has 3 pages, with 400 rows for each, but the last page is filtered by page index. When batch_size=992, the EOF of predicate column is true. However, we should set batch_size=800 for lazy read column, so the EOF of lazy read column may be false.) 2. The array column does not count the number of nulls 3. Generate wrong NullMap for array column --- be/src/vec/exec/format/parquet/parquet_common.cpp | 7 +++++++ be/src/vec/exec/format/parquet/vparquet_column_reader.cpp | 4 ++-- be/src/vec/exec/format/parquet/vparquet_group_reader.cpp | 5 ++--- be/src/vec/exec/format/parquet/vparquet_reader.cpp | 4 +++- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/be/src/vec/exec/format/parquet/parquet_common.cpp b/be/src/vec/exec/format/parquet/parquet_common.cpp index 48ffc9deee..5b8ff2f801 100644 --- a/be/src/vec/exec/format/parquet/parquet_common.cpp +++ b/be/src/vec/exec/format/parquet/parquet_common.cpp @@ -141,6 +141,13 @@ void ColumnSelectVector::set_run_length_null_map(const std::vector<uint16_t>& ru } is_null = !is_null; } + } else { + for (auto& run_length : run_length_null_map) { + if (is_null) { + _num_nulls += run_length; + } + is_null = !is_null; + } } } } diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index b536dec08a..4def91b6df 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -364,9 +364,9 @@ Status ArrayColumnReader::read_column_data(ColumnPtr& doris_column, DataTypePtr& NullMap& map_data_column = *map_data_ptr; auto origin_size = map_data_column.size(); map_data_column.resize(origin_size + scan_rows); - for (int i = offset_index; i < offset_index + scan_rows; ++i) { + for (int i = 0; i < scan_rows; ++i) { map_data_column[origin_size + i] = - (UInt8)(definitions[element_offsets[i]] == _NULL_ARRAY); + (UInt8)(definitions[element_offsets[offset_index + i]] == _NULL_ARRAY); } } else { for (int i = offset_index; i < offset_index + scan_rows; ++i) { diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 211d10d3ef..0f71990b2b 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -189,9 +189,8 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t batch_size, size_t* re if (pre_read_rows != lazy_read_rows) { return Status::Corruption("Can't read the same number of rows when doing lazy read"); } - if (pre_eof ^ lazy_eof) { - return Status::Corruption("Eof error when doing lazy read"); - } + // pre_eof ^ lazy_eof + // we set pre_read_rows as batch_size for lazy read columns, so pre_eof != lazy_eof // filter data in predicate columns, and remove filter column if (select_vector.has_filter()) { diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 5a6300eeb2..2ab2a1dfdc 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -183,7 +183,9 @@ void ParquetReader::_init_lazy_read() { visit_slot(child); } } else if (VInPredicate* in_predicate = typeid_cast<VInPredicate*>(filter_impl)) { - visit_slot(in_predicate->children()[0]); + if (in_predicate->children().size() > 0) { + visit_slot(in_predicate->children()[0]); + } } else { for (VExpr* child : filter_impl->children()) { visit_slot(child); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org