This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6de3881152 Fix RowConverter when FixedSizeList is not the last (#7789)
6de3881152 is described below

commit 6de38811526b960f79ce580cfee817c7a4bbfbc6
Author: Piotr Findeisen <[email protected]>
AuthorDate: Sat Jul 5 13:32:55 2025 +0200

    Fix RowConverter when FixedSizeList is not the last (#7789)
    
    # Which issue does this PR close?
    
    none
    
    # Rationale for this change
    
    `RowConverter` decoding fails when there is a `FixedSizeList` element
    and it's not the last.
    
    # What changes are included in this PR?
    
    Fix `RowConverter` row decoding when there is a `FixedSizeList` element
    and it's not the last.
    
    # Are these changes tested?
    
    yes
---
 arrow-row/src/lib.rs  | 200 +++++++++++++++++++++++++++++++++++++++++++++++++-
 arrow-row/src/list.rs |  10 +--
 2 files changed, 202 insertions(+), 8 deletions(-)

diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index ee1c117859..d76c51578c 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -690,6 +690,15 @@ impl RowConverter {
                 columns.len()
             )));
         }
+        for colum in columns.iter().skip(1) {
+            if colum.len() != columns[0].len() {
+                return Err(ArrowError::InvalidArgumentError(format!(
+                    "RowConverter columns must all have the same length, 
expected {} got {}",
+                    columns[0].len(),
+                    colum.len()
+                )));
+            }
+        }
 
         let encoders = columns
             .iter()
@@ -758,7 +767,20 @@ impl RowConverter {
         // SAFETY
         // We have validated that the rows came from this [`RowConverter`]
         // and therefore must be valid
-        unsafe { self.convert_raw(&mut rows, validate_utf8) }
+        let result = unsafe { self.convert_raw(&mut rows, validate_utf8) }?;
+
+        if cfg!(test) {
+            for (i, row) in rows.iter().enumerate() {
+                if !row.is_empty() {
+                    return Err(ArrowError::InvalidArgumentError(format!(
+                        "Codecs {codecs:?} did not consume all bytes for row 
{i}, remaining bytes: {row:?}",
+                        codecs = &self.codecs
+                    )));
+                }
+            }
+        }
+
+        Ok(result)
     }
 
     /// Returns an empty [`Rows`] with capacity for `row_capacity` rows with
@@ -2549,6 +2571,182 @@ mod tests {
         assert_eq!(&back[0], &list);
     }
 
+    #[test]
+    fn test_two_fixed_size_lists() {
+        let mut first = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
+        // 0: [100]
+        first.values().append_value(100);
+        first.append(true);
+        // 1: [101]
+        first.values().append_value(101);
+        first.append(true);
+        // 2: [102]
+        first.values().append_value(102);
+        first.append(true);
+        // 3: [null]
+        first.values().append_null();
+        first.append(true);
+        // 4: null
+        first.values().append_null(); // MASKED
+        first.append(false);
+        let first = Arc::new(first.finish()) as ArrayRef;
+        let first_type = first.data_type().clone();
+
+        let mut second = FixedSizeListBuilder::new(UInt8Builder::new(), 1);
+        // 0: [200]
+        second.values().append_value(200);
+        second.append(true);
+        // 1: [201]
+        second.values().append_value(201);
+        second.append(true);
+        // 2: [202]
+        second.values().append_value(202);
+        second.append(true);
+        // 3: [null]
+        second.values().append_null();
+        second.append(true);
+        // 4: null
+        second.values().append_null(); // MASKED
+        second.append(false);
+        let second = Arc::new(second.finish()) as ArrayRef;
+        let second_type = second.data_type().clone();
+
+        let converter = RowConverter::new(vec![
+            SortField::new(first_type.clone()),
+            SortField::new(second_type.clone()),
+        ])
+        .unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
+            .unwrap();
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 2);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &first);
+        back[1].to_data().validate_full().unwrap();
+        assert_eq!(&back[1], &second);
+    }
+
+    #[test]
+    fn test_fixed_size_list_with_variable_width_content() {
+        let mut first = FixedSizeListBuilder::new(
+            StructBuilder::from_fields(
+                vec![
+                    Field::new(
+                        "timestamp",
+                        DataType::Timestamp(TimeUnit::Microsecond, 
Some(Arc::from("UTC"))),
+                        false,
+                    ),
+                    Field::new("offset_minutes", DataType::Int16, false),
+                    Field::new("time_zone", DataType::Utf8, false),
+                ],
+                1,
+            ),
+            1,
+        );
+        // 0: null
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_null();
+        first.values().append(false);
+        first.append(false);
+        // 1: [null]
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_null();
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_null();
+        first.values().append(false);
+        first.append(true);
+        // 2: [1970-01-01 00:00:00.000000 UTC]
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_value(0);
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_value(0);
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_value("UTC");
+        first.values().append(true);
+        first.append(true);
+        // 3: [2005-09-10 13:30:00.123456 Europe/Warsaw]
+        first
+            .values()
+            .field_builder::<TimestampMicrosecondBuilder>(0)
+            .unwrap()
+            .append_value(1126351800123456);
+        first
+            .values()
+            .field_builder::<Int16Builder>(1)
+            .unwrap()
+            .append_value(120);
+        first
+            .values()
+            .field_builder::<StringBuilder>(2)
+            .unwrap()
+            .append_value("Europe/Warsaw");
+        first.values().append(true);
+        first.append(true);
+        let first = Arc::new(first.finish()) as ArrayRef;
+        let first_type = first.data_type().clone();
+
+        let mut second = StringBuilder::new();
+        second.append_value("somewhere near");
+        second.append_null();
+        second.append_value("Greenwich");
+        second.append_value("Warsaw");
+        let second = Arc::new(second.finish()) as ArrayRef;
+        let second_type = second.data_type().clone();
+
+        let converter = RowConverter::new(vec![
+            SortField::new(first_type.clone()),
+            SortField::new(second_type.clone()),
+        ])
+        .unwrap();
+
+        let rows = converter
+            .convert_columns(&[Arc::clone(&first), Arc::clone(&second)])
+            .unwrap();
+
+        let back = converter.convert_rows(&rows).unwrap();
+        assert_eq!(back.len(), 2);
+        back[0].to_data().validate_full().unwrap();
+        assert_eq!(&back[0], &first);
+        back[1].to_data().validate_full().unwrap();
+        assert_eq!(&back[1], &second);
+    }
+
     fn generate_primitive_array<K>(len: usize, valid_percent: f64) -> 
PrimitiveArray<K>
     where
         K: ArrowPrimitiveType,
diff --git a/arrow-row/src/list.rs b/arrow-row/src/list.rs
index 58fbc71caa..e9dc38e0fb 100644
--- a/arrow-row/src/list.rs
+++ b/arrow-row/src/list.rs
@@ -225,7 +225,6 @@ pub fn encode_fixed_size_list(
                     data[*offset] = 0x01;
                     *offset += 1;
                     for child_idx in (idx * value_length)..(idx + 1) * 
value_length {
-                        //dbg!(child_idx);
                         let row = rows.row(child_idx);
                         let end_offset = *offset + row.as_ref().len();
                         
data[*offset..end_offset].copy_from_slice(row.as_ref());
@@ -233,12 +232,8 @@ pub fn encode_fixed_size_list(
                     }
                 }
                 false => {
-                    let null_sentinels = 1;
-                    //+ value_length; // 1 for self + for values too
-                    for i in 0..null_sentinels {
-                        data[*offset + i] = null_sentinel;
-                    }
-                    *offset += null_sentinels;
+                    data[*offset] = null_sentinel;
+                    *offset += 1;
                 }
             };
         })
@@ -291,6 +286,7 @@ pub unsafe fn decode_fixed_size_list(
                 row_offset = next_offset;
             }
         }
+        *row = &row[row_offset..]; // Update row for the next decoder
     }
 
     let children = converter.convert_raw(&mut child_rows, validate_utf8)?;

Reply via email to