This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d7ad299154 [fix](NestedType) throw error when reading complex nested 
type in orc&parquet (#19489)
d7ad299154 is described below

commit d7ad299154f25aa0ba50c1de696fcbc11f666c4b
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Thu May 11 07:51:02 2023 +0800

    [fix](NestedType) throw error when reading complex nested type in 
orc&parquet (#19489)
    
    Doris block does not support complex nested type now, but orc and parquet 
reader has generated complex nested column,
    which makes the output of mysql client wrong and users confused.
---
 be/src/vec/exec/format/orc/vorc_reader.cpp           | 20 ++++++++++++++++++++
 .../exec/format/parquet/vparquet_column_reader.cpp   | 17 +++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 0f13b4d191..6a88360b49 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1016,6 +1016,11 @@ Status OrcReader::_orc_column_to_doris_column(const 
std::string& col_name,
                 reinterpret_cast<const 
DataTypeArray*>(remove_nullable(data_type).get())
                         ->get_nested_type());
         const orc::Type* nested_orc_type = orc_column_type->getSubtype(0);
+        if (nested_orc_type->getKind() == orc::TypeKind::MAP ||
+            nested_orc_type->getKind() == orc::TypeKind::STRUCT) {
+            return Status::InternalError(
+                    "Array does not support nested map/struct type in column 
{}", col_name);
+        }
         return _orc_column_to_doris_column<is_filter>(
                 col_name, 
static_cast<ColumnArray&>(*data_column).get_data_ptr(), nested_type,
                 nested_orc_type, orc_list->elements.get(), element_size);
@@ -1037,6 +1042,15 @@ Status OrcReader::_orc_column_to_doris_column(const 
std::string& col_name,
                         ->get_value_type());
         const orc::Type* orc_key_type = orc_column_type->getSubtype(0);
         const orc::Type* orc_value_type = orc_column_type->getSubtype(1);
+        if (orc_key_type->getKind() == orc::TypeKind::LIST ||
+            orc_key_type->getKind() == orc::TypeKind::MAP ||
+            orc_key_type->getKind() == orc::TypeKind::STRUCT ||
+            orc_value_type->getKind() == orc::TypeKind::LIST ||
+            orc_value_type->getKind() == orc::TypeKind::MAP ||
+            orc_value_type->getKind() == orc::TypeKind::STRUCT) {
+            return Status::InternalError("Map does not support nested complex 
type in column {}",
+                                         col_name);
+        }
         const ColumnPtr& doris_key_column = doris_map.get_keys_ptr();
         const ColumnPtr& doris_value_column = doris_map.get_values_ptr();
         RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(col_name, 
doris_key_column,
@@ -1060,6 +1074,12 @@ Status OrcReader::_orc_column_to_doris_column(const 
std::string& col_name,
         for (int i = 0; i < doris_struct.tuple_size(); ++i) {
             orc::ColumnVectorBatch* orc_field = orc_struct->fields[i];
             const orc::Type* orc_type = orc_column_type->getSubtype(i);
+            if (orc_type->getKind() == orc::TypeKind::LIST ||
+                orc_type->getKind() == orc::TypeKind::MAP ||
+                orc_type->getKind() == orc::TypeKind::STRUCT) {
+                return Status::InternalError(
+                        "Struct does not support nested complex type in column 
{}", col_name);
+            }
             const ColumnPtr& doris_field = doris_struct.get_column_ptr(i);
             const DataTypePtr& doris_type = doris_struct_type->get_element(i);
             RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(
diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
index 152012da4b..af1266cbbd 100644
--- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp
@@ -123,6 +123,11 @@ Status ParquetColumnReader::create(io::FileReaderSPtr 
file, FieldSchema* field,
                                    size_t max_buf_size) {
     if (field->type.type == TYPE_ARRAY) {
         std::unique_ptr<ParquetColumnReader> element_reader;
+        if (field->children[0].type.type == TYPE_MAP ||
+            field->children[0].type.type == TYPE_STRUCT) {
+            return Status::InternalError(
+                    "Array does not support nested map/struct type in column 
{}", field->name);
+        }
         RETURN_IF_ERROR(create(file, &field->children[0], row_group, 
row_ranges, ctz, io_ctx,
                                element_reader, max_buf_size));
         element_reader->set_nested_column();
@@ -130,6 +135,13 @@ Status ParquetColumnReader::create(io::FileReaderSPtr 
file, FieldSchema* field,
         RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field));
         reader.reset(array_reader);
     } else if (field->type.type == TYPE_MAP) {
+        auto key_type = field->children[0].children[0].type.type;
+        auto value_type = field->children[0].children[1].type.type;
+        if (key_type == TYPE_ARRAY || key_type == TYPE_MAP || key_type == 
TYPE_STRUCT ||
+            value_type == TYPE_ARRAY || value_type == TYPE_MAP || value_type 
== TYPE_STRUCT) {
+            return Status::InternalError("Map does not support nested complex 
type in column {}",
+                                         field->name);
+        }
         std::unique_ptr<ParquetColumnReader> key_reader;
         std::unique_ptr<ParquetColumnReader> value_reader;
         RETURN_IF_ERROR(create(file, &field->children[0].children[0], 
row_group, row_ranges, ctz,
@@ -144,6 +156,11 @@ Status ParquetColumnReader::create(io::FileReaderSPtr 
file, FieldSchema* field,
     } else if (field->type.type == TYPE_STRUCT) {
         std::vector<std::unique_ptr<ParquetColumnReader>> child_readers;
         for (int i = 0; i < field->children.size(); ++i) {
+            auto child_type = field->children[i].type.type;
+            if (child_type == TYPE_ARRAY || child_type == TYPE_MAP || 
child_type == TYPE_STRUCT) {
+                return Status::InternalError(
+                        "Struct does not support nested complex type in column 
{}", field->name);
+            }
             std::unique_ptr<ParquetColumnReader> child_reader;
             RETURN_IF_ERROR(create(file, &field->children[i], row_group, 
row_ranges, ctz, io_ctx,
                                    child_reader, max_buf_size));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to