This is an automated email from the ASF dual-hosted git repository.

jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new b1ddc241e9 fix: `Rows` `size` should use `capacity` and not `len` 
(#9044)
b1ddc241e9 is described below

commit b1ddc241e9b985791e7c9e33fbb2f0f3c8ea2e2d
Author: Raz Luvaton <[email protected]>
AuthorDate: Sat Dec 27 02:52:16 2025 +0200

    fix: `Rows` `size` should use `capacity` and not `len` (#9044)
    
    # Which issue does this PR close?
    
    N/A
    
    # Rationale for this change
    
    because `Rows` own the data and offsets vector, it should use the
    capacity for tracking the size that it uses
    
    # What changes are included in this PR?
    
    replace `len` with `capacity` and added test
    
    # Are these changes tested?
    
    yes (and of course they are failing on main and passing with this fix)
    
    # Are there any user-facing changes?
    
    more accurate size
---
 arrow-row/src/lib.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 72a295627e..aa6543485f 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1131,8 +1131,8 @@ impl Rows {
     pub fn size(&self) -> usize {
         // Size of fields is accounted for as part of RowConverter
         std::mem::size_of::<Self>()
-            + self.buffer.len()
-            + self.offsets.len() * std::mem::size_of::<usize>()
+            + self.buffer.capacity()
+            + self.offsets.capacity() * std::mem::size_of::<usize>()
     }
 
     /// Create a [BinaryArray] from the [Rows] data without reallocating the
@@ -4050,4 +4050,47 @@ mod tests {
         // "a" < "z"
         assert!(rows.row(3) < rows.row(1));
     }
+
+    #[test]
+    fn rows_size_should_count_for_capacity() {
+        let row_converter = 
RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
+
+        let empty_rows_size_with_preallocate_rows_and_data = {
+            let rows = row_converter.empty_rows(1000, 1000);
+
+            rows.size()
+        };
+        let empty_rows_size_with_preallocate_rows = {
+            let rows = row_converter.empty_rows(1000, 0);
+
+            rows.size()
+        };
+        let empty_rows_size_with_preallocate_data = {
+            let rows = row_converter.empty_rows(0, 1000);
+
+            rows.size()
+        };
+        let empty_rows_size_without_preallocate = {
+            let rows = row_converter.empty_rows(0, 0);
+
+            rows.size()
+        };
+
+        assert!(
+            empty_rows_size_with_preallocate_rows_and_data > 
empty_rows_size_with_preallocate_rows,
+            "{empty_rows_size_with_preallocate_rows_and_data} should be larger 
than {empty_rows_size_with_preallocate_rows}"
+        );
+        assert!(
+            empty_rows_size_with_preallocate_rows_and_data > 
empty_rows_size_with_preallocate_data,
+            "{empty_rows_size_with_preallocate_rows_and_data} should be larger 
than {empty_rows_size_with_preallocate_data}"
+        );
+        assert!(
+            empty_rows_size_with_preallocate_rows > 
empty_rows_size_without_preallocate,
+            "{empty_rows_size_with_preallocate_rows} should be larger than 
{empty_rows_size_without_preallocate}"
+        );
+        assert!(
+            empty_rows_size_with_preallocate_data > 
empty_rows_size_without_preallocate,
+            "{empty_rows_size_with_preallocate_data} should be larger than 
{empty_rows_size_without_preallocate}"
+        );
+    }
 }

Reply via email to