This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d7ad299154 [fix](NestedType) throw error when reading complex nested type in orc&parquet (#19489) d7ad299154 is described below commit d7ad299154f25aa0ba50c1de696fcbc11f666c4b Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Thu May 11 07:51:02 2023 +0800 [fix](NestedType) throw error when reading complex nested type in orc&parquet (#19489) Doris block does not support complex nested type now, but orc and parquet reader has generated complex nested column, which makes the output of mysql client wrong and users confused. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 20 ++++++++++++++++++++ .../exec/format/parquet/vparquet_column_reader.cpp | 17 +++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 0f13b4d191..6a88360b49 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1016,6 +1016,11 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, reinterpret_cast<const DataTypeArray*>(remove_nullable(data_type).get()) ->get_nested_type()); const orc::Type* nested_orc_type = orc_column_type->getSubtype(0); + if (nested_orc_type->getKind() == orc::TypeKind::MAP || + nested_orc_type->getKind() == orc::TypeKind::STRUCT) { + return Status::InternalError( + "Array does not support nested map/struct type in column {}", col_name); + } return _orc_column_to_doris_column<is_filter>( col_name, static_cast<ColumnArray&>(*data_column).get_data_ptr(), nested_type, nested_orc_type, orc_list->elements.get(), element_size); @@ -1037,6 +1042,15 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, ->get_value_type()); const orc::Type* orc_key_type = orc_column_type->getSubtype(0); const orc::Type* orc_value_type = orc_column_type->getSubtype(1); + if (orc_key_type->getKind() == orc::TypeKind::LIST || + orc_key_type->getKind() == orc::TypeKind::MAP || + orc_key_type->getKind() == orc::TypeKind::STRUCT || + orc_value_type->getKind() == orc::TypeKind::LIST || + orc_value_type->getKind() == orc::TypeKind::MAP || + orc_value_type->getKind() == orc::TypeKind::STRUCT) { + return Status::InternalError("Map does not support nested complex type in column {}", + col_name); + } const ColumnPtr& doris_key_column = doris_map.get_keys_ptr(); const ColumnPtr& doris_value_column = doris_map.get_values_ptr(); RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>(col_name, doris_key_column, @@ -1060,6 +1074,12 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, for (int i = 0; i < doris_struct.tuple_size(); ++i) { orc::ColumnVectorBatch* orc_field = orc_struct->fields[i]; const orc::Type* orc_type = orc_column_type->getSubtype(i); + if (orc_type->getKind() == orc::TypeKind::LIST || + orc_type->getKind() == orc::TypeKind::MAP || + orc_type->getKind() == orc::TypeKind::STRUCT) { + return Status::InternalError( + "Struct does not support nested complex type in column {}", col_name); + } const ColumnPtr& doris_field = doris_struct.get_column_ptr(i); const DataTypePtr& doris_type = doris_struct_type->get_element(i); RETURN_IF_ERROR(_orc_column_to_doris_column<is_filter>( diff --git a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp index 152012da4b..af1266cbbd 100644 --- a/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_column_reader.cpp @@ -123,6 +123,11 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, size_t max_buf_size) { if (field->type.type == TYPE_ARRAY) { std::unique_ptr<ParquetColumnReader> element_reader; + if (field->children[0].type.type == TYPE_MAP || + field->children[0].type.type == TYPE_STRUCT) { + return Status::InternalError( + "Array does not support nested map/struct type in column {}", field->name); + } RETURN_IF_ERROR(create(file, &field->children[0], row_group, row_ranges, ctz, io_ctx, element_reader, max_buf_size)); element_reader->set_nested_column(); @@ -130,6 +135,13 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, RETURN_IF_ERROR(array_reader->init(std::move(element_reader), field)); reader.reset(array_reader); } else if (field->type.type == TYPE_MAP) { + auto key_type = field->children[0].children[0].type.type; + auto value_type = field->children[0].children[1].type.type; + if (key_type == TYPE_ARRAY || key_type == TYPE_MAP || key_type == TYPE_STRUCT || + value_type == TYPE_ARRAY || value_type == TYPE_MAP || value_type == TYPE_STRUCT) { + return Status::InternalError("Map does not support nested complex type in column {}", + field->name); + } std::unique_ptr<ParquetColumnReader> key_reader; std::unique_ptr<ParquetColumnReader> value_reader; RETURN_IF_ERROR(create(file, &field->children[0].children[0], row_group, row_ranges, ctz, @@ -144,6 +156,11 @@ Status ParquetColumnReader::create(io::FileReaderSPtr file, FieldSchema* field, } else if (field->type.type == TYPE_STRUCT) { std::vector<std::unique_ptr<ParquetColumnReader>> child_readers; for (int i = 0; i < field->children.size(); ++i) { + auto child_type = field->children[i].type.type; + if (child_type == TYPE_ARRAY || child_type == TYPE_MAP || child_type == TYPE_STRUCT) { + return Status::InternalError( + "Struct does not support nested complex type in column {}", field->name); + } std::unique_ptr<ParquetColumnReader> child_reader; RETURN_IF_ERROR(create(file, &field->children[i], row_group, row_ranges, ctz, io_ctx, child_reader, max_buf_size)); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org