This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new fc70179acb [multi-catalog](fix) the eof of lazy read columns may be 
not equal to the eof of predicate columns (#14212)
fc70179acb is described below

commit fc70179acb8ed096527f537b821612434c37aa46
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Mon Nov 14 14:37:21 2022 +0800

    [multi-catalog](fix) the eof of lazy read columns may be not equal to the 
eof of predicate columns (#14212)
    
    Fix three bugs:
    1. The EOF of lazy read columns may be not equal to the EOF of predicate 
columns.
    (for example: If the predicate column has 3 pages, with 400 rows for each, 
but the last page
    is filtered by page index. When batch_size=992, the EOF of predicate column 
is true.
    However, we should set batch_size=800 for lazy read column, so the EOF of 
lazy read column may be false.)
    2. The array column does not count the number of nulls
    3. Generate wrong NullMap for array column
---
 be/src/vec/exec/format/parquet/parquet_common.cpp         | 7 +++++++
 be/src/vec/exec/format/parquet/vparquet_column_reader.cpp | 4 ++--
 be/src/vec/exec/format/parquet/vparquet_group_reader.cpp  | 5 ++---
 be/src/vec/exec/format/parquet/vparquet_reader.cpp        | 4 +++-
 4 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/parquet_common.cpp 
b/be/src/vec/exec/format/parquet/parquet_common.cpp
index 48ffc9deee..5b8ff2f801 100644
--- a/be/src/vec/exec/format/parquet/parquet_common.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_common.cpp
@@ -141,6 +141,13 @@ void ColumnSelectVector::set_run_length_null_map(const 
std::vector<uint16_t>& ru
                 }
                 is_null = !is_null;
             }
+        } else {
+            for (auto& run_length : run_length_null_map) {
+                if (is_null) {
+                    _num_nulls += run_length;
+                }
+                is_null = !is_null;
+            }
         }
     }
 }
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index b536dec08a..4def91b6df 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -364,9 +364,9 @@ Status ArrayColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr&
                 NullMap& map_data_column = *map_data_ptr;
                 auto origin_size = map_data_column.size();
                 map_data_column.resize(origin_size + scan_rows);
-                for (int i = offset_index; i < offset_index + scan_rows; ++i) {
+                for (int i = 0; i < scan_rows; ++i) {
                     map_data_column[origin_size + i] =
-                            (UInt8)(definitions[element_offsets[i]] == 
_NULL_ARRAY);
+                            (UInt8)(definitions[element_offsets[offset_index + 
i]] == _NULL_ARRAY);
                 }
             } else {
                 for (int i = offset_index; i < offset_index + scan_rows; ++i) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 211d10d3ef..0f71990b2b 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -189,9 +189,8 @@ Status RowGroupReader::_do_lazy_read(Block* block, size_t 
batch_size, size_t* re
     if (pre_read_rows != lazy_read_rows) {
         return Status::Corruption("Can't read the same number of rows when 
doing lazy read");
     }
-    if (pre_eof ^ lazy_eof) {
-        return Status::Corruption("Eof error when doing lazy read");
-    }
+    // pre_eof ^ lazy_eof
+    // we set pre_read_rows as batch_size for lazy read columns, so pre_eof != 
lazy_eof
 
     // filter data in predicate columns, and remove filter column
     if (select_vector.has_filter()) {
diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index 5a6300eeb2..2ab2a1dfdc 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -183,7 +183,9 @@ void ParquetReader::_init_lazy_read() {
                     visit_slot(child);
                 }
             } else if (VInPredicate* in_predicate = 
typeid_cast<VInPredicate*>(filter_impl)) {
-                visit_slot(in_predicate->children()[0]);
+                if (in_predicate->children().size() > 0) {
+                    visit_slot(in_predicate->children()[0]);
+                }
             } else {
                 for (VExpr* child : filter_impl->children()) {
                     visit_slot(child);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to