Decimal256` support for `cast_to_variant` kernel (#8101)

alamb Wed, 13 Aug 2025 05:52:48 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 536524b2b1 [Variant]: Implement 
`DataType::Decimal32/Decimal64/Decimal128/Decimal256` support for 
`cast_to_variant` kernel (#8101)
536524b2b1 is described below

commit 536524b2b1fe298de25ba8b6fd8602079325d31b
Author: Liam Bao <[email protected]>
AuthorDate: Wed Aug 13 08:52:38 2025 -0400

    [Variant]: Implement `DataType::Decimal32/Decimal64/Decimal128/Decimal256` 
support for `cast_to_variant` kernel (#8101)
    
    # Which issue does this PR close?
    
    - Closes #8059.
    
    # Rationale for this change
    
    See the linked issue.
    
    # What changes are included in this PR?
    
    Created a new macro to convert Arrow decimal to variant decimal. Support
    `Decimal32/Decimal64/Decimal128/Decimal256` for `cast_to_variant`.
    
    # Are these changes tested?
    
    Yes
    
    # Are there any user-facing changes?
    
    Yes, new variant casting supported
---
 parquet-variant-compute/src/cast_to_variant.rs | 491 ++++++++++++++++++++++++-
 1 file changed, 485 insertions(+), 6 deletions(-)

diff --git a/parquet-variant-compute/src/cast_to_variant.rs 
b/parquet-variant-compute/src/cast_to_variant.rs
index 617e5cfbe5..874b734466 100644
--- a/parquet-variant-compute/src/cast_to_variant.rs
+++ b/parquet-variant-compute/src/cast_to_variant.rs
@@ -18,12 +18,13 @@
 use crate::{VariantArray, VariantArrayBuilder};
 use arrow::array::{Array, AsArray};
 use arrow::datatypes::{
-    BinaryType, BinaryViewType, Float16Type, Float32Type, Float64Type, 
Int16Type, Int32Type,
-    Int64Type, Int8Type, LargeBinaryType, UInt16Type, UInt32Type, UInt64Type, 
UInt8Type,
+    i256, BinaryType, BinaryViewType, Decimal128Type, Decimal256Type, 
Decimal32Type, Decimal64Type,
+    Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, 
Int8Type,
+    LargeBinaryType, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
 };
 use arrow_schema::{ArrowError, DataType};
 use half::f16;
-use parquet_variant::Variant;
+use parquet_variant::{Variant, VariantDecimal16, VariantDecimal4, 
VariantDecimal8};
 
 /// Convert the input array of a specific primitive type to a `VariantArray`
 /// row by row
@@ -71,6 +72,31 @@ macro_rules! cast_conversion_nongeneric {
     }};
 }
 
+/// Convert a decimal value to a `VariantDecimal`
+macro_rules! decimal_to_variant_decimal {
+    ($v:ident, $scale:expr, $value_type:ty, $variant_type:ty) => {
+        if *$scale < 0 {
+            // For negative scale, we need to multiply the value by 10^|scale|
+            // For example: 123 with scale -2 becomes 12300
+            let multiplier = (10 as $value_type).pow((-*$scale) as u32);
+            // Check for overflow
+            if $v > 0 && $v > <$value_type>::MAX / multiplier {
+                return Variant::Null;
+            }
+            if $v < 0 && $v < <$value_type>::MIN / multiplier {
+                return Variant::Null;
+            }
+            <$variant_type>::try_new($v * multiplier, 0)
+                .map(|v| v.into())
+                .unwrap_or(Variant::Null)
+        } else {
+            <$variant_type>::try_new($v, *$scale as u8)
+                .map(|v| v.into())
+                .unwrap_or(Variant::Null)
+        }
+    };
+}
+
 /// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when 
you
 /// need to convert a specific data type
 ///
@@ -148,6 +174,51 @@ pub fn cast_to_variant(input: &dyn Array) -> 
Result<VariantArray, ArrowError> {
         DataType::Float64 => {
             primitive_conversion!(Float64Type, input, builder);
         }
+        DataType::Decimal32(_, scale) => {
+            cast_conversion!(
+                Decimal32Type,
+                as_primitive,
+                |v| decimal_to_variant_decimal!(v, scale, i32, 
VariantDecimal4),
+                input,
+                builder
+            );
+        }
+        DataType::Decimal64(_, scale) => {
+            cast_conversion!(
+                Decimal64Type,
+                as_primitive,
+                |v| decimal_to_variant_decimal!(v, scale, i64, 
VariantDecimal8),
+                input,
+                builder
+            );
+        }
+        DataType::Decimal128(_, scale) => {
+            cast_conversion!(
+                Decimal128Type,
+                as_primitive,
+                |v| decimal_to_variant_decimal!(v, scale, i128, 
VariantDecimal16),
+                input,
+                builder
+            );
+        }
+        DataType::Decimal256(_, scale) => {
+            cast_conversion!(
+                Decimal256Type,
+                as_primitive,
+                |v: i256| {
+                    // Since `i128::MAX` is larger than the max value of 
`VariantDecimal16`,
+                    // any `i256` value that cannot be cast to `i128` is 
unable to be cast to `VariantDecimal16` either.
+                    // Therefore, we can safely convert `i256` to `i128` first 
and process it like `i128`.
+                    if let Some(v) = v.to_i128() {
+                        decimal_to_variant_decimal!(v, scale, i128, 
VariantDecimal16)
+                    } else {
+                        Variant::Null
+                    }
+                },
+                input,
+                builder
+            );
+        }
         DataType::FixedSizeBinary(_) => {
             cast_conversion_nongeneric!(as_fixed_size_binary, |v| v, input, 
builder);
         }
@@ -168,13 +239,29 @@ pub fn cast_to_variant(input: &dyn Array) -> 
Result<VariantArray, ArrowError> {
 mod tests {
     use super::*;
     use arrow::array::{
-        ArrayRef, FixedSizeBinaryBuilder, Float16Array, Float32Array, 
Float64Array,
-        GenericByteBuilder, GenericByteViewBuilder, Int16Array, Int32Array, 
Int64Array, Int8Array,
-        UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+        ArrayRef, Decimal128Array, Decimal256Array, Decimal32Array, 
Decimal64Array,
+        FixedSizeBinaryBuilder, Float16Array, Float32Array, Float64Array, 
GenericByteBuilder,
+        GenericByteViewBuilder, Int16Array, Int32Array, Int64Array, Int8Array, 
UInt16Array,
+        UInt32Array, UInt64Array, UInt8Array,
+    };
+    use arrow_schema::{
+        DECIMAL128_MAX_PRECISION, DECIMAL32_MAX_PRECISION, 
DECIMAL64_MAX_PRECISION,
     };
     use parquet_variant::{Variant, VariantDecimal16};
     use std::{sync::Arc, vec};
 
+    macro_rules! max_unscaled_value {
+        (32, $precision:expr) => {
+            (u32::pow(10, $precision as u32) - 1) as i32
+        };
+        (64, $precision:expr) => {
+            (u64::pow(10, $precision as u32) - 1) as i64
+        };
+        (128, $precision:expr) => {
+            (u128::pow(10, $precision as u32) - 1) as i128
+        };
+    }
+
     #[test]
     fn test_cast_to_variant_fixed_size_binary() {
         let v1 = vec![1, 2];
@@ -482,6 +569,398 @@ mod tests {
         )
     }
 
+    #[test]
+    fn test_cast_to_variant_decimal32() {
+        run_test(
+            Arc::new(
+                Decimal32Array::from(vec![
+                    Some(i32::MIN),
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION) - 
1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION)), 
// The min of Decimal32 with positive scale that can be cast to VariantDecimal4
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION)), // 
The max of Decimal32 with positive scale that can be cast to VariantDecimal4
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION) + 
1), // Overflow value will be cast to Null
+                    Some(i32::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL32_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal4::try_new(-max_unscaled_value!(32, 
DECIMAL32_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                None,
+                Some(VariantDecimal4::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal4::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal4::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal4::try_new(max_unscaled_value!(32, 
DECIMAL32_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal32_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal32Array::from(vec![
+                    Some(i32::MIN),
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) 
- 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 
3)), // The min of Decimal32 with scale -3 that can be cast to VariantDecimal4
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 
3)), // The max of Decimal32 with scale -3 that can be cast to VariantDecimal4
+                    Some(max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) 
+ 1), // Overflow value will be cast to Null
+                    Some(i32::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL32_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal4::try_new(
+                        -max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) 
* 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal4::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal4::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal4::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal4::try_new(
+                        max_unscaled_value!(32, DECIMAL32_MAX_PRECISION - 3) * 
1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal64() {
+        run_test(
+            Arc::new(
+                Decimal64Array::from(vec![
+                    Some(i64::MIN),
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION) - 
1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION)), 
// The min of Decimal64 with positive scale that can be cast to VariantDecimal8
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION)), // 
The max of Decimal64 with positive scale that can be cast to VariantDecimal8
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION) + 
1), // Overflow value will be cast to Null
+                    Some(i64::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL64_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal8::try_new(-max_unscaled_value!(64, 
DECIMAL64_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                None,
+                Some(VariantDecimal8::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal8::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal8::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal8::try_new(max_unscaled_value!(64, 
DECIMAL64_MAX_PRECISION), 3)
+                        .unwrap()
+                        .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal64_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal64Array::from(vec![
+                    Some(i64::MIN),
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) 
- 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 
3)), // The min of Decimal64 with scale -3 that can be cast to VariantDecimal8
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 
3)), // The max of Decimal64 with scale -3 that can be cast to VariantDecimal8
+                    Some(max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) 
+ 1), // Overflow value will be cast to Null
+                    Some(i64::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL64_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal8::try_new(
+                        -max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) 
* 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal8::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal8::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal8::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal8::try_new(
+                        max_unscaled_value!(64, DECIMAL64_MAX_PRECISION - 3) * 
1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal128() {
+        run_test(
+            Arc::new(
+                Decimal128Array::from(vec![
+                    Some(i128::MIN),
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) - 
1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION)), 
// The min of Decimal128 with positive scale that can be cast to 
VariantDecimal16
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION)), 
// The max of Decimal128 with positive scale that can be cast to 
VariantDecimal16
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) + 
1), // Overflow value will be cast to Null
+                    Some(i128::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal128_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal128Array::from(vec![
+                    Some(i128::MIN),
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3) - 1), // Overflow value will be cast to Null
+                    Some(-max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3)), // The min of Decimal128 with scale -3 that can be cast to VariantDecimal16
+                    None,
+                    Some(-123),
+                    Some(0),
+                    Some(123),
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3)), // The max of Decimal128 with scale -3 that can be cast to VariantDecimal16
+                    Some(max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3) + 1), // Overflow value will be cast to Null
+                    Some(i128::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) 
* 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal256() {
+        run_test(
+            Arc::new(
+                Decimal256Array::from(vec![
+                    Some(i256::MIN),
+                    Some(i256::from_i128(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) - 
1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::from_i128(-max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION
+                    ))), // The min of Decimal256 with positive scale that can 
be cast to VariantDecimal16
+                    None,
+                    Some(i256::from_i128(-123)),
+                    Some(i256::from_i128(0)),
+                    Some(i256::from_i128(123)),
+                    Some(i256::from_i128(max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION
+                    ))), // The max of Decimal256 with positive scale that can 
be cast to VariantDecimal16
+                    Some(i256::from_i128(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION) + 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, 3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 3).unwrap().into()),
+                Some(VariantDecimal16::try_new(123, 3).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION),
+                        3,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
+    #[test]
+    fn test_cast_to_variant_decimal256_negative_scale() {
+        run_test(
+            Arc::new(
+                Decimal256Array::from(vec![
+                    Some(i256::MIN),
+                    Some(i256::from_i128(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3) - 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::from_i128(-max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION - 3
+                    ))), // The min of Decimal256 with scale -3 that can be 
cast to VariantDecimal16
+                    None,
+                    Some(i256::from_i128(-123)),
+                    Some(i256::from_i128(0)),
+                    Some(i256::from_i128(123)),
+                    Some(i256::from_i128(max_unscaled_value!(
+                        128,
+                        DECIMAL128_MAX_PRECISION - 3
+                    ))), // The max of Decimal256 with scale -3 that can be 
cast to VariantDecimal16
+                    Some(i256::from_i128(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) 
+ 1,
+                    )), // Overflow value will be cast to Null
+                    Some(i256::MAX),
+                ])
+                .with_precision_and_scale(DECIMAL128_MAX_PRECISION, -3)
+                .unwrap(),
+            ),
+            vec![
+                Some(Variant::Null),
+                Some(Variant::Null),
+                Some(
+                    VariantDecimal16::try_new(
+                        -max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 
3) * 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                None,
+                Some(VariantDecimal16::try_new(-123_000, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(0, 0).unwrap().into()),
+                Some(VariantDecimal16::try_new(123_000, 0).unwrap().into()),
+                Some(
+                    VariantDecimal16::try_new(
+                        max_unscaled_value!(128, DECIMAL128_MAX_PRECISION - 3) 
* 1000,
+                        0,
+                    )
+                    .unwrap()
+                    .into(),
+                ),
+                Some(Variant::Null),
+                Some(Variant::Null),
+            ],
+        )
+    }
+
     /// Converts the given `Array` to a `VariantArray` and tests the conversion
     /// against the expected values. It also tests the handling of nulls by
     /// setting one element to null and verifying the output.

(arrow-rs) branch main updated: [Variant]: Implement `DataType::Decimal32/Decimal64/Decimal128/Decimal256` support for `cast_to_variant` kernel (#8101)

Reply via email to