This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit 750bf85030e609c8c3dd7433ff1fb5201a9cde5a Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Fri Jan 13 11:51:11 2023 +0800 [fix](parquet-reader) fix coredump when load datatime data to doris from parquet (#15794) `date_time_v2` will check scale when constructed datatimev2: ``` LOG(FATAL) << fmt::format("Scale {} is out of bounds", scale); ``` This [PR](https://github.com/apache/doris/pull/15510) has fixed this issue, but parquet does not use constructor to create `TypeDescriptor`, leading the `scale = -1` when reading datetimev2 data. --- be/src/runtime/types.h | 1 + be/src/vec/exec/format/parquet/schema_desc.cpp | 63 ++++++++++++++------------ 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index 06629a655e..aca6336f6b 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -74,6 +74,7 @@ struct TypeDescriptor { precision = 27; scale = 9; } else if (type == TYPE_DATETIMEV2) { + precision = 18; scale = 6; } } diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index 2af4d40ea2..b8b9b07184 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -167,24 +167,27 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph if (type.type == INVALID_TYPE) { switch (physical_schema.type) { case tparquet::Type::BOOLEAN: - type.type = TYPE_BOOLEAN; + type = TypeDescriptor(TYPE_BOOLEAN); break; case tparquet::Type::INT32: - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); break; case tparquet::Type::INT64: + type = TypeDescriptor(TYPE_BIGINT); + break; case tparquet::Type::INT96: - type.type = TYPE_BIGINT; + // in most cases, it's a nano timestamp + type = TypeDescriptor(TYPE_DATETIMEV2); break; case tparquet::Type::FLOAT: - type.type = TYPE_FLOAT; + type = TypeDescriptor(TYPE_FLOAT); break; case tparquet::Type::DOUBLE: - type.type = TYPE_DOUBLE; + type = TypeDescriptor(TYPE_DOUBLE); break; case tparquet::Type::BYTE_ARRAY: case tparquet::Type::FIXED_LEN_BYTE_ARRAY: - type.type = TYPE_STRING; + type = TypeDescriptor(TYPE_STRING); break; default: break; @@ -196,33 +199,31 @@ TypeDescriptor FieldDescriptor::get_doris_type(const tparquet::SchemaElement& ph TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::LogicalType logicalType) { TypeDescriptor type; if (logicalType.__isset.STRING) { - type.type = TYPE_STRING; + type = TypeDescriptor(TYPE_STRING); } else if (logicalType.__isset.DECIMAL) { - type.type = TYPE_DECIMALV2; - type.precision = 27; - type.scale = 9; + type = TypeDescriptor(TYPE_DECIMALV2); } else if (logicalType.__isset.DATE) { - type.type = TYPE_DATEV2; + type = TypeDescriptor(TYPE_DATEV2); } else if (logicalType.__isset.INTEGER) { if (logicalType.INTEGER.isSigned) { if (logicalType.INTEGER.bitWidth <= 32) { - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); } else { - type.type = TYPE_BIGINT; + type = TypeDescriptor(TYPE_BIGINT); } } else { if (logicalType.INTEGER.bitWidth <= 16) { - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); } else { - type.type = TYPE_BIGINT; + type = TypeDescriptor(TYPE_BIGINT); } } } else if (logicalType.__isset.TIME) { - type.type = TYPE_TIMEV2; + type = TypeDescriptor(TYPE_TIMEV2); } else if (logicalType.__isset.TIMESTAMP) { - type.type = TYPE_DATETIMEV2; + type = TypeDescriptor(TYPE_DATETIMEV2); } else { - type.type = INVALID_TYPE; + type = TypeDescriptor(INVALID_TYPE); } return type; } @@ -231,39 +232,41 @@ TypeDescriptor FieldDescriptor::convert_to_doris_type(tparquet::ConvertedType::t TypeDescriptor type; switch (convertedType) { case tparquet::ConvertedType::type::UTF8: - type.type = TYPE_STRING; + type = TypeDescriptor(TYPE_STRING); break; case tparquet::ConvertedType::type::DECIMAL: - type.type = TYPE_DECIMALV2; - type.precision = 27; - type.scale = 9; + type = TypeDescriptor(TYPE_DECIMALV2); break; case tparquet::ConvertedType::type::DATE: - type.type = TYPE_DATEV2; + type = TypeDescriptor(TYPE_DATEV2); break; case tparquet::ConvertedType::type::TIME_MILLIS: case tparquet::ConvertedType::type::TIME_MICROS: - type.type = TYPE_TIMEV2; + type = TypeDescriptor(TYPE_TIMEV2); break; case tparquet::ConvertedType::type::TIMESTAMP_MILLIS: case tparquet::ConvertedType::type::TIMESTAMP_MICROS: - type.type = TYPE_DATETIMEV2; + type = TypeDescriptor(TYPE_DATETIMEV2); break; - case tparquet::ConvertedType::type::UINT_8: - case tparquet::ConvertedType::type::UINT_16: case tparquet::ConvertedType::type::INT_8: + type = TypeDescriptor(TYPE_TINYINT); + break; + case tparquet::ConvertedType::type::UINT_8: case tparquet::ConvertedType::type::INT_16: + type = TypeDescriptor(TYPE_SMALLINT); + break; + case tparquet::ConvertedType::type::UINT_16: case tparquet::ConvertedType::type::INT_32: - type.type = TYPE_INT; + type = TypeDescriptor(TYPE_INT); break; case tparquet::ConvertedType::type::UINT_32: case tparquet::ConvertedType::type::UINT_64: case tparquet::ConvertedType::type::INT_64: - type.type = TYPE_BIGINT; + type = TypeDescriptor(TYPE_BIGINT); break; default: LOG(WARNING) << "Not supported parquet ConvertedType: " << convertedType; - type = INVALID_TYPE; + type = TypeDescriptor(INVALID_TYPE); break; } return type; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org