This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 11428061da2 [fix](parquet) return error if schema changed in complex 
types #31128 (#31133)
11428061da2 is described below

commit 11428061da2d78d2c705b5f05504a5fa64ea3122
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Tue Feb 20 15:55:32 2024 +0800

    [fix](parquet) return error if schema changed in complex types #31128 
(#31133)
---
 .../exec/format/parquet/vparquet_column_reader.cpp | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 059375ef006..cf9753f46eb 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -414,7 +414,7 @@ Status ScalarColumnReader::_read_nested_column(ColumnPtr& 
doris_column, DataType
     }
     RETURN_IF_ERROR(_chunk_reader->decode_values(data_column, type, 
select_vector, is_dict_filter));
     if (ancestor_nulls != 0) {
-        _chunk_reader->skip_values(ancestor_nulls, false);
+        RETURN_IF_ERROR(_chunk_reader->skip_values(ancestor_nulls, false));
     }
 
     if (!align_rows) {
@@ -582,6 +582,9 @@ Status ArrayColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr&
         }
         data_column = doris_column->assume_mutable();
     }
+    if (remove_nullable(type)->get_type_id() != TypeIndex::Array) {
+        return Status::Corruption("Wrong data type for column '{}'", 
_field_schema->name);
+    }
 
     ColumnPtr& element_column = 
static_cast<ColumnArray&>(*data_column).get_data_ptr();
     DataTypePtr& element_type = const_cast<DataTypePtr&>(
@@ -628,6 +631,9 @@ Status MapColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr& t
         }
         data_column = doris_column->assume_mutable();
     }
+    if (remove_nullable(type)->get_type_id() != TypeIndex::Map) {
+        return Status::Corruption("Wrong data type for column '{}'", 
_field_schema->name);
+    }
 
     auto& map = static_cast<ColumnMap&>(*data_column);
     DataTypePtr& key_type = const_cast<DataTypePtr&>(
@@ -691,6 +697,9 @@ Status StructColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr
         }
         data_column = doris_column->assume_mutable();
     }
+    if (remove_nullable(type)->get_type_id() != TypeIndex::Struct) {
+        return Status::Corruption("Wrong data type for column '{}'", 
_field_schema->name);
+    }
 
     auto& doris_struct = static_cast<ColumnStruct&>(*data_column);
     if (_child_readers.size() != doris_struct.tuple_size()) {
@@ -705,17 +714,18 @@ Status StructColumnReader::read_column_data(ColumnPtr& 
doris_column, DataTypePtr
         size_t field_rows = 0;
         bool field_eof = false;
         if (i == 0) {
-            _child_readers[i]->read_column_data(doris_field, doris_type, 
select_vector, batch_size,
-                                                &field_rows, &field_eof, 
is_dict_filter);
+            RETURN_IF_ERROR(_child_readers[i]->read_column_data(
+                    doris_field, doris_type, select_vector, batch_size, 
&field_rows, &field_eof,
+                    is_dict_filter));
             *read_rows = field_rows;
             *eof = field_eof;
         } else {
             while (field_rows < *read_rows && !field_eof) {
                 size_t loop_rows = 0;
                 select_vector.reset();
-                _child_readers[i]->read_column_data(doris_field, doris_type, 
select_vector,
-                                                    *read_rows - field_rows, 
&loop_rows, &field_eof,
-                                                    is_dict_filter);
+                RETURN_IF_ERROR(_child_readers[i]->read_column_data(
+                        doris_field, doris_type, select_vector, *read_rows - 
field_rows, &loop_rows,
+                        &field_eof, is_dict_filter));
                 field_rows += loop_rows;
             }
             DCHECK_EQ(*read_rows, field_rows);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to