This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 554cafa140 Implement `DataType::Float16` => `Variant::Float` (#8073)
554cafa140 is described below
commit 554cafa140f0e4a007aa00a411d3f2b63bc6076c
Author: superserious-dev <[email protected]>
AuthorDate: Thu Aug 7 03:35:29 2025 -0700
Implement `DataType::Float16` => `Variant::Float` (#8073)
# Which issue does this PR close?
- Closes #8057
# Rationale for this change
Adds Float16 conversion to the `cast_to_variant` kernel
# What changes are included in this PR?
- a macro to make converting array type that require a cast simpler
- conversion of `DataType::Float16` => `Variant::Float`
# Are these changes tested?
Yes, additional unit tests have been added.
# Are there any user-facing changes?
Yes, adds new type conversion to kernel
---
parquet-variant-compute/Cargo.toml | 2 +-
parquet-variant-compute/src/cast_to_variant.rs | 50 +++++++++++++++++++++++---
2 files changed, 47 insertions(+), 5 deletions(-)
diff --git a/parquet-variant-compute/Cargo.toml
b/parquet-variant-compute/Cargo.toml
index cc13810a29..0aa926ee7f 100644
--- a/parquet-variant-compute/Cargo.toml
+++ b/parquet-variant-compute/Cargo.toml
@@ -33,6 +33,7 @@ rust-version = { workspace = true }
[dependencies]
arrow = { workspace = true }
arrow-schema = { workspace = true }
+half = { version = "2.1", default-features = false }
parquet-variant = { workspace = true }
parquet-variant-json = { workspace = true }
@@ -49,4 +50,3 @@ arrow = { workspace = true, features = ["test_utils"] }
[[bench]]
name = "variant_kernels"
harness = false
-
diff --git a/parquet-variant-compute/src/cast_to_variant.rs
b/parquet-variant-compute/src/cast_to_variant.rs
index 49bdd30cea..cbd16c589c 100644
--- a/parquet-variant-compute/src/cast_to_variant.rs
+++ b/parquet-variant-compute/src/cast_to_variant.rs
@@ -18,10 +18,11 @@
use crate::{VariantArray, VariantArrayBuilder};
use arrow::array::{Array, AsArray};
use arrow::datatypes::{
- Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
UInt16Type, UInt32Type,
- UInt64Type, UInt8Type,
+ Float16Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
Int8Type, UInt16Type,
+ UInt32Type, UInt64Type, UInt8Type,
};
use arrow_schema::{ArrowError, DataType};
+use half::f16;
use parquet_variant::Variant;
/// Convert the input array of a specific primitive type to a `VariantArray`
@@ -39,6 +40,22 @@ macro_rules! primitive_conversion {
}};
}
+/// Convert the input array to a `VariantArray` row by row,
+/// transforming each element with `cast_fn`
+macro_rules! cast_conversion {
+ ($t:ty, $cast_fn:expr, $input:expr, $builder:expr) => {{
+ let array = $input.as_primitive::<$t>();
+ for i in 0..array.len() {
+ if array.is_null(i) {
+ $builder.append_null();
+ continue;
+ }
+ let cast_value = $cast_fn(array.value(i));
+ $builder.append_variant(Variant::from(cast_value));
+ }
+ }};
+}
+
/// Casts a typed arrow [`Array`] to a [`VariantArray`]. This is useful when
you
/// need to convert a specific data type
///
@@ -92,6 +109,9 @@ pub fn cast_to_variant(input: &dyn Array) ->
Result<VariantArray, ArrowError> {
DataType::UInt64 => {
primitive_conversion!(UInt64Type, input, builder);
}
+ DataType::Float16 => {
+ cast_conversion!(Float16Type, |v: f16| -> f32 { v.into() }, input,
builder);
+ }
DataType::Float32 => {
primitive_conversion!(Float32Type, input, builder);
}
@@ -115,8 +135,8 @@ pub fn cast_to_variant(input: &dyn Array) ->
Result<VariantArray, ArrowError> {
mod tests {
use super::*;
use arrow::array::{
- ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array,
Int64Array, Int8Array,
- UInt16Array, UInt32Array, UInt64Array, UInt8Array,
+ ArrayRef, Float16Array, Float32Array, Float64Array, Int16Array,
Int32Array, Int64Array,
+ Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
};
use parquet_variant::{Variant, VariantDecimal16};
use std::sync::Arc;
@@ -284,6 +304,28 @@ mod tests {
)
}
+ #[test]
+ fn test_cast_to_variant_float16() {
+ run_test(
+ Arc::new(Float16Array::from(vec![
+ Some(f16::MIN),
+ None,
+ Some(f16::from_f32(-1.5)),
+ Some(f16::from_f32(0.0)),
+ Some(f16::from_f32(1.5)),
+ Some(f16::MAX),
+ ])),
+ vec![
+ Some(Variant::Float(f16::MIN.into())),
+ None,
+ Some(Variant::Float(-1.5)),
+ Some(Variant::Float(0.0)),
+ Some(Variant::Float(1.5)),
+ Some(Variant::Float(f16::MAX.into())),
+ ],
+ )
+ }
+
#[test]
fn test_cast_to_variant_float32() {
run_test(