This is an automated email from the ASF dual-hosted git repository.
scovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 183f8c1c53 Add PrimitiveRunBuilder::with_data_type() to customize the
values' DataType (#9473)
183f8c1c53 is described below
commit 183f8c1c5361ac5f026d6fbfa8e99a2920dcb652
Author: Bruno <[email protected]>
AuthorDate: Fri Feb 27 14:34:49 2026 +0100
Add PrimitiveRunBuilder::with_data_type() to customize the values' DataType
(#9473)
This enables setting a timezone or precision & scale on parameterized
DataType values.
Note: I think the panic is unfortunate, and a try_with_data_type() would
be sensible.
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/8042.
# Are these changes tested?
Yes
# Are there any user-facing changes?
- Adds `PrimitiveRunBuilder::with_data_type`.
---
arrow-array/src/builder/primitive_run_builder.rs | 54 +++++++++++++++++++++++-
1 file changed, 52 insertions(+), 2 deletions(-)
diff --git a/arrow-array/src/builder/primitive_run_builder.rs
b/arrow-array/src/builder/primitive_run_builder.rs
index 52bdaa6f40..c1dc0d8d7d 100644
--- a/arrow-array/src/builder/primitive_run_builder.rs
+++ b/arrow-array/src/builder/primitive_run_builder.rs
@@ -108,6 +108,20 @@ where
prev_run_end_index: 0,
}
}
+
+ /// Overrides the data type of the values child array.
+ ///
+ /// By default, `V::DATA_TYPE` is used (via [`PrimitiveBuilder`]). This
+ /// allows setting the timezone of a Timestamp, the precision & scale of a
+ /// Decimal, etc.
+ ///
+ /// # Panics
+ ///
+ /// This method panics if `values_builder` rejects `data_type`.
+ pub fn with_data_type(mut self, data_type: arrow_schema::DataType) -> Self
{
+ self.values_builder = self.values_builder.with_data_type(data_type);
+ self
+ }
}
impl<R, V> ArrayBuilder for PrimitiveRunBuilder<R, V>
@@ -259,10 +273,12 @@ where
#[cfg(test)]
mod tests {
+ use arrow_schema::DataType;
+
use crate::builder::PrimitiveRunBuilder;
use crate::cast::AsArray;
- use crate::types::{Int16Type, UInt32Type};
- use crate::{Array, UInt32Array};
+ use crate::types::{Decimal128Type, Int16Type, TimestampMicrosecondType,
UInt32Type};
+ use crate::{Array, Decimal128Array, TimestampMicrosecondArray,
UInt32Array};
#[test]
fn test_primitive_ree_array_builder() {
@@ -310,4 +326,38 @@ mod tests {
&[1, 2, 5, 4, 6, 2]
);
}
+
+ #[test]
+ #[should_panic]
+ fn test_override_data_type_invalid() {
+ PrimitiveRunBuilder::<Int16Type,
UInt32Type>::new().with_data_type(DataType::UInt64);
+ }
+
+ #[test]
+ fn test_override_data_type() {
+ // Noop.
+ PrimitiveRunBuilder::<Int16Type,
UInt32Type>::new().with_data_type(DataType::UInt32);
+
+ // Setting scale & precision.
+ let mut builder = PrimitiveRunBuilder::<Int16Type,
Decimal128Type>::new()
+ .with_data_type(DataType::Decimal128(1, 2));
+ builder.append_value(123);
+ let array = builder.finish();
+ let array = array.downcast::<Decimal128Array>().unwrap();
+ let values = array.values();
+ assert_eq!(values.precision(), 1);
+ assert_eq!(values.scale(), 2);
+
+ // Setting timezone.
+ let mut builder = PrimitiveRunBuilder::<Int16Type,
TimestampMicrosecondType>::new()
+ .with_data_type(DataType::Timestamp(
+ arrow_schema::TimeUnit::Microsecond,
+ Some("Europe/Paris".into()),
+ ));
+ builder.append_value(1);
+ let array = builder.finish();
+ let array = array.downcast::<TimestampMicrosecondArray>().unwrap();
+ let values = array.values();
+ assert_eq!(values.timezone(), Some("Europe/Paris"));
+ }
}