This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new de1686ac79 feat: support array indices in VariantPath dot notation
(#9012)
de1686ac79 is described below
commit de1686ac79738793e5dfa067c7c844edbe79864a
Author: Dhanush <[email protected]>
AuthorDate: Sat Dec 27 17:30:39 2025 +0530
feat: support array indices in VariantPath dot notation (#9012)
# Which issue does this PR close?
- Closes #8946
# What changes are included in this PR?
The PR adds support for parsing array index (eg. `foo.bar[3]`) with the
help of parse_path fn. Currently the parser silently parses invalid
segments as Field (eg., `foo[0`, `[0]`(parsed as index), `foo0]`,
`foo[0][`)
#### Feedback requested
Whether to add stricter validation (throw an error) and reject the
segment ? Or to keep the current behavior ?
# Are these changes tested?
yes, only for valid inputs
# Are there any user-facing changes?
no
---
parquet-variant/src/path.rs | 39 ++++++++++++++++++++++++++++++++++-----
parquet-variant/src/utils.rs | 33 +++++++++++++++++++++++++++++++++
2 files changed, 67 insertions(+), 5 deletions(-)
diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
index e222c3ac9c..2aeb9df97d 100644
--- a/parquet-variant/src/path.rs
+++ b/parquet-variant/src/path.rs
@@ -16,6 +16,8 @@
// under the License.
use std::{borrow::Cow, ops::Deref};
+use crate::utils::parse_path;
+
/// Represents a qualified path to a potential subfield or index of a variant
/// value.
///
@@ -112,11 +114,7 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for
VariantPath<'a> {
/// Create from &str with support for dot notation
impl<'a> From<&'a str> for VariantPath<'a> {
fn from(path: &'a str) -> Self {
- if path.is_empty() {
- VariantPath::new(vec![])
- } else {
- VariantPath::new(path.split('.').map(Into::into).collect())
- }
+ VariantPath::new(path.split(".").flat_map(parse_path).collect())
}
}
@@ -223,4 +221,35 @@ mod tests {
let path = VariantPath::from_iter([p]);
assert!(!path.is_empty());
}
+
+ #[test]
+ fn test_variant_path_dot_notation_with_array_index() {
+ let path = VariantPath::from("city.store.books[3].title");
+
+ let expected = VariantPath::from("city")
+ .join("store")
+ .join("books")
+ .join(3)
+ .join("title");
+
+ assert_eq!(path, expected);
+ }
+
+ #[test]
+ fn test_variant_path_dot_notation_with_only_array_index() {
+ let path = VariantPath::from("[3]");
+
+ let expected = VariantPath::from(3);
+
+ assert_eq!(path, expected);
+ }
+
+ #[test]
+ fn test_variant_path_dot_notation_with_starting_array_index() {
+ let path = VariantPath::from("[3].title");
+
+ let expected = VariantPath::from(3).join("title");
+
+ assert_eq!(path, expected);
+ }
}
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index d28b8685ba..6accbcb366 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -16,6 +16,7 @@
// under the License.
use std::{array::TryFromSliceError, ops::Range, str};
+use crate::VariantPathElement;
use arrow_schema::ArrowError;
use std::cmp::Ordering;
@@ -149,6 +150,38 @@ pub(crate) fn fits_precision<const N: u32>(n: impl
Into<i64>) -> bool {
n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
}
+// Helper fn to parse input segments like foo[0] or foo[0][0]
+#[inline]
+pub(crate) fn parse_path<'a>(segment: &'a str) -> Vec<VariantPathElement<'a>> {
+ if segment.is_empty() {
+ return Vec::new();
+ }
+
+ let mut path_elements = Vec::new();
+ let mut base = segment;
+
+ while let Some(stripped) = base.strip_suffix(']') {
+ let Some(open_pos) = stripped.rfind('[') else {
+ return vec![VariantPathElement::field(segment)];
+ };
+
+ let index_str = &stripped[open_pos + 1..];
+ let Ok(index) = index_str.parse::<usize>() else {
+ return vec![VariantPathElement::field(segment)];
+ };
+
+ path_elements.push(VariantPathElement::index(index));
+ base = &stripped[..open_pos];
+ }
+
+ if !base.is_empty() {
+ path_elements.push(VariantPathElement::field(base));
+ }
+
+ path_elements.reverse();
+ path_elements
+}
+
#[cfg(test)]
mod test {
use super::*;