This is an automated email from the ASF dual-hosted git repository. ashingau pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 2b4c4bb4424 [Fix][Opt](parquet-reader) Fix filter push down with decimal types in parquet reader. (#27897) 2b4c4bb4424 is described below commit 2b4c4bb44247fb10165bab5b0c98daae7f269c0e Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Mon Dec 4 22:25:39 2023 +0800 [Fix][Opt](parquet-reader) Fix filter push down with decimal types in parquet reader. (#27897) Fix filter push down with decimal types in parquet reader introduced by #22842 --- be/src/vec/exec/format/format_common.h | 11 +++++++---- be/src/vec/exec/format/orc/vorc_reader.cpp | 16 ++++++++-------- be/src/vec/exec/format/orc/vorc_reader.h | 4 ++-- .../exec/format/parquet/parquet_column_convert.h | 19 +++++++++---------- be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 21 +++++++++++---------- 5 files changed, 37 insertions(+), 34 deletions(-) diff --git a/be/src/vec/exec/format/format_common.h b/be/src/vec/exec/format/format_common.h index 8d1b651c7b0..9e8ce4fe238 100644 --- a/be/src/vec/exec/format/format_common.h +++ b/be/src/vec/exec/format/format_common.h @@ -34,14 +34,17 @@ struct DecimalScaleParams { template <typename DecimalPrimitiveType> static inline constexpr DecimalPrimitiveType get_scale_factor(int32_t n) { - if constexpr (std::is_same_v<DecimalPrimitiveType, Int32>) { + if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal32>) { return common::exp10_i32(n); - } else if constexpr (std::is_same_v<DecimalPrimitiveType, Int64>) { + } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal64>) { return common::exp10_i64(n); - } else if constexpr (std::is_same_v<DecimalPrimitiveType, Int128>) { + } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal128>) { + return common::exp10_i128(n); + } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal128I>) { return common::exp10_i128(n); } else { - return DecimalPrimitiveType(1); + static_assert(!sizeof(DecimalPrimitiveType), + "All types must be matched with if constexpr."); } } }; diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 7f016e0f938..aa790d45170 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1281,17 +1281,17 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, case TypeIndex::Int32: return _decode_int32_column<is_filter>(col_name, data_column, cvb, num_values); case TypeIndex::Decimal32: - return _decode_decimal_column<Int32, is_filter>(col_name, data_column, data_type, cvb, - num_values); + return _decode_decimal_column<Decimal32, is_filter>(col_name, data_column, data_type, cvb, + num_values); case TypeIndex::Decimal64: - return _decode_decimal_column<Int64, is_filter>(col_name, data_column, data_type, cvb, - num_values); + return _decode_decimal_column<Decimal64, is_filter>(col_name, data_column, data_type, cvb, + num_values); case TypeIndex::Decimal128: - return _decode_decimal_column<Int128, is_filter>(col_name, data_column, data_type, cvb, - num_values); + return _decode_decimal_column<Decimal128, is_filter>(col_name, data_column, data_type, cvb, + num_values); case TypeIndex::Decimal128I: - return _decode_decimal_column<Int128, is_filter>(col_name, data_column, data_type, cvb, - num_values); + return _decode_decimal_column<Decimal128I, is_filter>(col_name, data_column, data_type, cvb, + num_values); case TypeIndex::Date: return _decode_time_column<VecDateTimeValue, Int64, orc::LongVectorBatch, is_filter>( col_name, data_column, cvb, num_values); diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index 0384b0b9793..ca699831d66 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -287,7 +287,7 @@ private: if (scale_params.scale_type != DecimalScaleParams::NOT_INIT) { return; } - auto* decimal_type = reinterpret_cast<DataTypeDecimal<Decimal<DecimalPrimitiveType>>*>( + auto* decimal_type = reinterpret_cast<DataTypeDecimal<DecimalPrimitiveType>*>( const_cast<IDataType*>(remove_nullable(data_type).get())); auto dest_scale = decimal_type->get_scale(); if (dest_scale > orc_decimal_scale) { @@ -324,7 +324,7 @@ private: auto* cvb_data = data->values.data(); auto& column_data = - static_cast<ColumnDecimal<Decimal<DecimalPrimitiveType>>&>(*data_column).get_data(); + static_cast<ColumnDecimal<DecimalPrimitiveType>&>(*data_column).get_data(); auto origin_size = column_data.size(); column_data.resize(origin_size + num_values); diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index 9273d7a1aa2..65f5270face 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -108,11 +108,11 @@ struct PhysicalTypeTraits<tparquet::Type::INT96> { M(TypeIndex::Float32, Float32, Float32) \ M(TypeIndex::Float64, Float64, Float64) -#define FOR_LOGICAL_DECIMAL_TYPES(M) \ - M(TypeIndex::Decimal32, Decimal32, Int32) \ - M(TypeIndex::Decimal64, Decimal64, Int64) \ - M(TypeIndex::Decimal128, Decimal128, Int128) \ - M(TypeIndex::Decimal128I, Decimal128, Int128) +#define FOR_LOGICAL_DECIMAL_TYPES(M) \ + M(TypeIndex::Decimal32, Decimal32, Decimal32) \ + M(TypeIndex::Decimal64, Decimal64, Decimal64) \ + M(TypeIndex::Decimal128, Decimal128, Decimal128) \ + M(TypeIndex::Decimal128I, Decimal128I, Decimal128I) struct ConvertParams { // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false @@ -174,7 +174,7 @@ struct ConvertParams { return; } auto scale = field_schema->parquet_schema.scale; - auto* decimal_type = static_cast<DataTypeDecimal<Decimal<DecimalPrimitiveType>>*>( + auto* decimal_type = static_cast<DataTypeDecimal<DecimalPrimitiveType>*>( const_cast<IDataType*>(remove_nullable(data_type).get())); auto dest_scale = decimal_type->get_scale(); if (dest_scale > scale) { @@ -400,9 +400,8 @@ public: dst_col->resize(_convert_params->start_idx + rows); DecimalScaleParams& scale_params = _convert_params->decimal_scale; - auto* data = static_cast<ColumnDecimal<Decimal<DecimalPhysicalType>>*>(dst_col.get()) - ->get_data() - .data(); + auto* data = + static_cast<ColumnDecimal<DecimalPhysicalType>*>(dst_col.get())->get_data().data(); for (int i = 0; i < rows; i++) { ValueCopyType value = src_data[i]; @@ -678,4 +677,4 @@ inline Status get_converter(tparquet::Type::type parquet_physical_type, Primitiv }; // namespace ParquetConvert -}; // namespace doris::vectorized \ No newline at end of file +}; // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h index b993370a159..8158fb9e9f1 100644 --- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h +++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h @@ -163,7 +163,7 @@ private: break; case TYPE_DECIMALV2: if constexpr (std::is_same_v<CppType, DecimalV2Value>) { - size_t max_precision = max_decimal_precision<Decimal<__int128_t>>(); + size_t max_precision = max_decimal_precision<Decimal128>(); if (col_schema->parquet_schema.precision < 1 || col_schema->parquet_schema.precision > max_precision || col_schema->parquet_schema.scale > max_precision) { @@ -172,18 +172,18 @@ private: int v2_scale = DecimalV2Value::SCALE; if (physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) { min_value = DecimalV2Value( - _decode_binary_decimal<Int128>(col_schema, encoded_min, v2_scale)); + _decode_binary_decimal<Decimal128>(col_schema, encoded_min, v2_scale)); max_value = DecimalV2Value( - _decode_binary_decimal<Int128>(col_schema, encoded_max, v2_scale)); + _decode_binary_decimal<Decimal128>(col_schema, encoded_max, v2_scale)); } else if (physical_type == tparquet::Type::INT32) { - min_value = DecimalV2Value(_decode_primitive_decimal<Int128, Int32>( + min_value = DecimalV2Value(_decode_primitive_decimal<Decimal128, Int32>( col_schema, encoded_min, v2_scale)); - max_value = DecimalV2Value(_decode_primitive_decimal<Int128, Int32>( + max_value = DecimalV2Value(_decode_primitive_decimal<Decimal128, Int32>( col_schema, encoded_max, v2_scale)); } else if (physical_type == tparquet::Type::INT64) { - min_value = DecimalV2Value(_decode_primitive_decimal<Int128, Int64>( + min_value = DecimalV2Value(_decode_primitive_decimal<Decimal128, Int64>( col_schema, encoded_min, v2_scale)); - max_value = DecimalV2Value(_decode_primitive_decimal<Int128, Int64>( + max_value = DecimalV2Value(_decode_primitive_decimal<Decimal128, Int64>( col_schema, encoded_max, v2_scale)); } else { return false; @@ -197,9 +197,10 @@ private: case TYPE_DECIMAL64: [[fallthrough]]; case TYPE_DECIMAL128I: - if constexpr (std::is_same_v<CppType, int32_t> || std::is_same_v<CppType, int64_t> || - std::is_same_v<CppType, __int128_t>) { - size_t max_precision = max_decimal_precision<Decimal<CppType>>(); + if constexpr (std::is_same_v<CppType, Decimal32> || + std::is_same_v<CppType, Decimal64> || + std::is_same_v<CppType, Decimal128I>) { + size_t max_precision = max_decimal_precision<CppType>(); if (col_schema->parquet_schema.precision < 1 || col_schema->parquet_schema.precision > max_precision || col_schema->parquet_schema.scale > max_precision) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org