(doris) branch master updated: [Fix][Opt](parquet-reader) Fix filter push down with decimal types in parquet reader. (#27897)

ashingau Mon, 04 Dec 2023 06:25:52 -0800

This is an automated email from the ASF dual-hosted git repository.

ashingau pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/master by this push:
     new 2b4c4bb4424 [Fix][Opt](parquet-reader) Fix filter push down with 
decimal types in parquet reader. (#27897)
2b4c4bb4424 is described below

commit 2b4c4bb44247fb10165bab5b0c98daae7f269c0e
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Mon Dec 4 22:25:39 2023 +0800

    [Fix][Opt](parquet-reader) Fix filter push down with decimal types in 
parquet reader. (#27897)
    
    Fix filter push down with decimal types in parquet reader introduced by 
#22842
---
 be/src/vec/exec/format/format_common.h              | 11 +++++++----
 be/src/vec/exec/format/orc/vorc_reader.cpp          | 16 ++++++++--------
 be/src/vec/exec/format/orc/vorc_reader.h            |  4 ++--
 .../exec/format/parquet/parquet_column_convert.h    | 19 +++++++++----------
 be/src/vec/exec/format/parquet/parquet_pred_cmp.h   | 21 +++++++++++----------
 5 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/be/src/vec/exec/format/format_common.h 
b/be/src/vec/exec/format/format_common.h
index 8d1b651c7b0..9e8ce4fe238 100644
--- a/be/src/vec/exec/format/format_common.h
+++ b/be/src/vec/exec/format/format_common.h
@@ -34,14 +34,17 @@ struct DecimalScaleParams {
 
     template <typename DecimalPrimitiveType>
     static inline constexpr DecimalPrimitiveType get_scale_factor(int32_t n) {
-        if constexpr (std::is_same_v<DecimalPrimitiveType, Int32>) {
+        if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal32>) {
             return common::exp10_i32(n);
-        } else if constexpr (std::is_same_v<DecimalPrimitiveType, Int64>) {
+        } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal64>) {
             return common::exp10_i64(n);
-        } else if constexpr (std::is_same_v<DecimalPrimitiveType, Int128>) {
+        } else if constexpr (std::is_same_v<DecimalPrimitiveType, Decimal128>) 
{
+            return common::exp10_i128(n);
+        } else if constexpr (std::is_same_v<DecimalPrimitiveType, 
Decimal128I>) {
             return common::exp10_i128(n);
         } else {
-            return DecimalPrimitiveType(1);
+            static_assert(!sizeof(DecimalPrimitiveType),
+                          "All types must be matched with if constexpr.");
         }
     }
 };
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 7f016e0f938..aa790d45170 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -1281,17 +1281,17 @@ Status OrcReader::_orc_column_to_doris_column(const 
std::string& col_name,
     case TypeIndex::Int32:
         return _decode_int32_column<is_filter>(col_name, data_column, cvb, 
num_values);
     case TypeIndex::Decimal32:
-        return _decode_decimal_column<Int32, is_filter>(col_name, data_column, 
data_type, cvb,
-                                                        num_values);
+        return _decode_decimal_column<Decimal32, is_filter>(col_name, 
data_column, data_type, cvb,
+                                                            num_values);
     case TypeIndex::Decimal64:
-        return _decode_decimal_column<Int64, is_filter>(col_name, data_column, 
data_type, cvb,
-                                                        num_values);
+        return _decode_decimal_column<Decimal64, is_filter>(col_name, 
data_column, data_type, cvb,
+                                                            num_values);
     case TypeIndex::Decimal128:
-        return _decode_decimal_column<Int128, is_filter>(col_name, 
data_column, data_type, cvb,
-                                                         num_values);
+        return _decode_decimal_column<Decimal128, is_filter>(col_name, 
data_column, data_type, cvb,
+                                                             num_values);
     case TypeIndex::Decimal128I:
-        return _decode_decimal_column<Int128, is_filter>(col_name, 
data_column, data_type, cvb,
-                                                         num_values);
+        return _decode_decimal_column<Decimal128I, is_filter>(col_name, 
data_column, data_type, cvb,
+                                                              num_values);
     case TypeIndex::Date:
         return _decode_time_column<VecDateTimeValue, Int64, 
orc::LongVectorBatch, is_filter>(
                 col_name, data_column, cvb, num_values);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 0384b0b9793..ca699831d66 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -287,7 +287,7 @@ private:
         if (scale_params.scale_type != DecimalScaleParams::NOT_INIT) {
             return;
         }
-        auto* decimal_type = 
reinterpret_cast<DataTypeDecimal<Decimal<DecimalPrimitiveType>>*>(
+        auto* decimal_type = 
reinterpret_cast<DataTypeDecimal<DecimalPrimitiveType>*>(
                 const_cast<IDataType*>(remove_nullable(data_type).get()));
         auto dest_scale = decimal_type->get_scale();
         if (dest_scale > orc_decimal_scale) {
@@ -324,7 +324,7 @@ private:
 
         auto* cvb_data = data->values.data();
         auto& column_data =
-                
static_cast<ColumnDecimal<Decimal<DecimalPrimitiveType>>&>(*data_column).get_data();
+                
static_cast<ColumnDecimal<DecimalPrimitiveType>&>(*data_column).get_data();
         auto origin_size = column_data.size();
         column_data.resize(origin_size + num_values);
 
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h 
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 9273d7a1aa2..65f5270face 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -108,11 +108,11 @@ struct PhysicalTypeTraits<tparquet::Type::INT96> {
     M(TypeIndex::Float32, Float32, Float32) \
     M(TypeIndex::Float64, Float64, Float64)
 
-#define FOR_LOGICAL_DECIMAL_TYPES(M)             \
-    M(TypeIndex::Decimal32, Decimal32, Int32)    \
-    M(TypeIndex::Decimal64, Decimal64, Int64)    \
-    M(TypeIndex::Decimal128, Decimal128, Int128) \
-    M(TypeIndex::Decimal128I, Decimal128, Int128)
+#define FOR_LOGICAL_DECIMAL_TYPES(M)                 \
+    M(TypeIndex::Decimal32, Decimal32, Decimal32)    \
+    M(TypeIndex::Decimal64, Decimal64, Decimal64)    \
+    M(TypeIndex::Decimal128, Decimal128, Decimal128) \
+    M(TypeIndex::Decimal128I, Decimal128I, Decimal128I)
 
 struct ConvertParams {
     // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false
@@ -174,7 +174,7 @@ struct ConvertParams {
             return;
         }
         auto scale = field_schema->parquet_schema.scale;
-        auto* decimal_type = 
static_cast<DataTypeDecimal<Decimal<DecimalPrimitiveType>>*>(
+        auto* decimal_type = 
static_cast<DataTypeDecimal<DecimalPrimitiveType>*>(
                 const_cast<IDataType*>(remove_nullable(data_type).get()));
         auto dest_scale = decimal_type->get_scale();
         if (dest_scale > scale) {
@@ -400,9 +400,8 @@ public:
         dst_col->resize(_convert_params->start_idx + rows);
 
         DecimalScaleParams& scale_params = _convert_params->decimal_scale;
-        auto* data = 
static_cast<ColumnDecimal<Decimal<DecimalPhysicalType>>*>(dst_col.get())
-                             ->get_data()
-                             .data();
+        auto* data =
+                
static_cast<ColumnDecimal<DecimalPhysicalType>*>(dst_col.get())->get_data().data();
 
         for (int i = 0; i < rows; i++) {
             ValueCopyType value = src_data[i];
@@ -678,4 +677,4 @@ inline Status get_converter(tparquet::Type::type 
parquet_physical_type, Primitiv
 
 }; // namespace ParquetConvert
 
-}; // namespace doris::vectorized
\ No newline at end of file
+}; // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h 
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index b993370a159..8158fb9e9f1 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -163,7 +163,7 @@ private:
             break;
         case TYPE_DECIMALV2:
             if constexpr (std::is_same_v<CppType, DecimalV2Value>) {
-                size_t max_precision = 
max_decimal_precision<Decimal<__int128_t>>();
+                size_t max_precision = max_decimal_precision<Decimal128>();
                 if (col_schema->parquet_schema.precision < 1 ||
                     col_schema->parquet_schema.precision > max_precision ||
                     col_schema->parquet_schema.scale > max_precision) {
@@ -172,18 +172,18 @@ private:
                 int v2_scale = DecimalV2Value::SCALE;
                 if (physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
                     min_value = DecimalV2Value(
-                            _decode_binary_decimal<Int128>(col_schema, 
encoded_min, v2_scale));
+                            _decode_binary_decimal<Decimal128>(col_schema, 
encoded_min, v2_scale));
                     max_value = DecimalV2Value(
-                            _decode_binary_decimal<Int128>(col_schema, 
encoded_max, v2_scale));
+                            _decode_binary_decimal<Decimal128>(col_schema, 
encoded_max, v2_scale));
                 } else if (physical_type == tparquet::Type::INT32) {
-                    min_value = 
DecimalV2Value(_decode_primitive_decimal<Int128, Int32>(
+                    min_value = 
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int32>(
                             col_schema, encoded_min, v2_scale));
-                    max_value = 
DecimalV2Value(_decode_primitive_decimal<Int128, Int32>(
+                    max_value = 
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int32>(
                             col_schema, encoded_max, v2_scale));
                 } else if (physical_type == tparquet::Type::INT64) {
-                    min_value = 
DecimalV2Value(_decode_primitive_decimal<Int128, Int64>(
+                    min_value = 
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int64>(
                             col_schema, encoded_min, v2_scale));
-                    max_value = 
DecimalV2Value(_decode_primitive_decimal<Int128, Int64>(
+                    max_value = 
DecimalV2Value(_decode_primitive_decimal<Decimal128, Int64>(
                             col_schema, encoded_max, v2_scale));
                 } else {
                     return false;
@@ -197,9 +197,10 @@ private:
         case TYPE_DECIMAL64:
             [[fallthrough]];
         case TYPE_DECIMAL128I:
-            if constexpr (std::is_same_v<CppType, int32_t> || 
std::is_same_v<CppType, int64_t> ||
-                          std::is_same_v<CppType, __int128_t>) {
-                size_t max_precision = 
max_decimal_precision<Decimal<CppType>>();
+            if constexpr (std::is_same_v<CppType, Decimal32> ||
+                          std::is_same_v<CppType, Decimal64> ||
+                          std::is_same_v<CppType, Decimal128I>) {
+                size_t max_precision = max_decimal_precision<CppType>();
                 if (col_schema->parquet_schema.precision < 1 ||
                     col_schema->parquet_schema.precision > max_precision ||
                     col_schema->parquet_schema.scale > max_precision) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris) branch master updated: [Fix][Opt](parquet-reader) Fix filter push down with decimal types in parquet reader. (#27897)

Reply via email to