This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new de1686ac79 feat: support array indices in VariantPath dot notation 
(#9012)
de1686ac79 is described below

commit de1686ac79738793e5dfa067c7c844edbe79864a
Author: Dhanush <[email protected]>
AuthorDate: Sat Dec 27 17:30:39 2025 +0530

    feat: support array indices in VariantPath dot notation (#9012)
    
    # Which issue does this PR close?
    
    - Closes #8946
    
    # What changes are included in this PR?
    The PR adds support for parsing array index (eg. `foo.bar[3]`) with the
    help of parse_path fn. Currently the parser silently parses invalid
    segments as Field (eg., `foo[0`, `[0]`(parsed as index), `foo0]`,
    `foo[0][`)
    
    #### Feedback requested
    Whether to add stricter validation (throw an error) and reject the
    segment ? Or to keep the current behavior ?
    
    # Are these changes tested?
    yes, only for valid inputs
    
    # Are there any user-facing changes?
    no
---
 parquet-variant/src/path.rs  | 39 ++++++++++++++++++++++++++++++++++-----
 parquet-variant/src/utils.rs | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
index e222c3ac9c..2aeb9df97d 100644
--- a/parquet-variant/src/path.rs
+++ b/parquet-variant/src/path.rs
@@ -16,6 +16,8 @@
 // under the License.
 use std::{borrow::Cow, ops::Deref};
 
+use crate::utils::parse_path;
+
 /// Represents a qualified path to a potential subfield or index of a variant
 /// value.
 ///
@@ -112,11 +114,7 @@ impl<'a> From<Vec<VariantPathElement<'a>>> for 
VariantPath<'a> {
 /// Create from &str with support for dot notation
 impl<'a> From<&'a str> for VariantPath<'a> {
     fn from(path: &'a str) -> Self {
-        if path.is_empty() {
-            VariantPath::new(vec![])
-        } else {
-            VariantPath::new(path.split('.').map(Into::into).collect())
-        }
+        VariantPath::new(path.split(".").flat_map(parse_path).collect())
     }
 }
 
@@ -223,4 +221,35 @@ mod tests {
         let path = VariantPath::from_iter([p]);
         assert!(!path.is_empty());
     }
+
+    #[test]
+    fn test_variant_path_dot_notation_with_array_index() {
+        let path = VariantPath::from("city.store.books[3].title");
+
+        let expected = VariantPath::from("city")
+            .join("store")
+            .join("books")
+            .join(3)
+            .join("title");
+
+        assert_eq!(path, expected);
+    }
+
+    #[test]
+    fn test_variant_path_dot_notation_with_only_array_index() {
+        let path = VariantPath::from("[3]");
+
+        let expected = VariantPath::from(3);
+
+        assert_eq!(path, expected);
+    }
+
+    #[test]
+    fn test_variant_path_dot_notation_with_starting_array_index() {
+        let path = VariantPath::from("[3].title");
+
+        let expected = VariantPath::from(3).join("title");
+
+        assert_eq!(path, expected);
+    }
 }
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index d28b8685ba..6accbcb366 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -16,6 +16,7 @@
 // under the License.
 use std::{array::TryFromSliceError, ops::Range, str};
 
+use crate::VariantPathElement;
 use arrow_schema::ArrowError;
 
 use std::cmp::Ordering;
@@ -149,6 +150,38 @@ pub(crate) fn fits_precision<const N: u32>(n: impl 
Into<i64>) -> bool {
     n.into().unsigned_abs().leading_zeros() >= (i64::BITS - N)
 }
 
+// Helper fn to parse input segments like foo[0] or foo[0][0]
+#[inline]
+pub(crate) fn parse_path<'a>(segment: &'a str) -> Vec<VariantPathElement<'a>> {
+    if segment.is_empty() {
+        return Vec::new();
+    }
+
+    let mut path_elements = Vec::new();
+    let mut base = segment;
+
+    while let Some(stripped) = base.strip_suffix(']') {
+        let Some(open_pos) = stripped.rfind('[') else {
+            return vec![VariantPathElement::field(segment)];
+        };
+
+        let index_str = &stripped[open_pos + 1..];
+        let Ok(index) = index_str.parse::<usize>() else {
+            return vec![VariantPathElement::field(segment)];
+        };
+
+        path_elements.push(VariantPathElement::index(index));
+        base = &stripped[..open_pos];
+    }
+
+    if !base.is_empty() {
+        path_elements.push(VariantPathElement::field(base));
+    }
+
+    path_elements.reverse();
+    path_elements
+}
+
 #[cfg(test)]
 mod test {
     use super::*;

Reply via email to