This is an automated email from the ASF dual-hosted git repository.
jeffreyvo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new b1ddc241e9 fix: `Rows` `size` should use `capacity` and not `len`
(#9044)
b1ddc241e9 is described below
commit b1ddc241e9b985791e7c9e33fbb2f0f3c8ea2e2d
Author: Raz Luvaton <[email protected]>
AuthorDate: Sat Dec 27 02:52:16 2025 +0200
fix: `Rows` `size` should use `capacity` and not `len` (#9044)
# Which issue does this PR close?
N/A
# Rationale for this change
because `Rows` own the data and offsets vector, it should use the
capacity for tracking the size that it uses
# What changes are included in this PR?
replace `len` with `capacity` and added test
# Are these changes tested?
yes (and of course they are failing on main and passing with this fix)
# Are there any user-facing changes?
more accurate size
---
arrow-row/src/lib.rs | 47 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 45 insertions(+), 2 deletions(-)
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 72a295627e..aa6543485f 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1131,8 +1131,8 @@ impl Rows {
pub fn size(&self) -> usize {
// Size of fields is accounted for as part of RowConverter
std::mem::size_of::<Self>()
- + self.buffer.len()
- + self.offsets.len() * std::mem::size_of::<usize>()
+ + self.buffer.capacity()
+ + self.offsets.capacity() * std::mem::size_of::<usize>()
}
/// Create a [BinaryArray] from the [Rows] data without reallocating the
@@ -4050,4 +4050,47 @@ mod tests {
// "a" < "z"
assert!(rows.row(3) < rows.row(1));
}
+
+ #[test]
+ fn rows_size_should_count_for_capacity() {
+ let row_converter =
RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
+
+ let empty_rows_size_with_preallocate_rows_and_data = {
+ let rows = row_converter.empty_rows(1000, 1000);
+
+ rows.size()
+ };
+ let empty_rows_size_with_preallocate_rows = {
+ let rows = row_converter.empty_rows(1000, 0);
+
+ rows.size()
+ };
+ let empty_rows_size_with_preallocate_data = {
+ let rows = row_converter.empty_rows(0, 1000);
+
+ rows.size()
+ };
+ let empty_rows_size_without_preallocate = {
+ let rows = row_converter.empty_rows(0, 0);
+
+ rows.size()
+ };
+
+ assert!(
+ empty_rows_size_with_preallocate_rows_and_data >
empty_rows_size_with_preallocate_rows,
+ "{empty_rows_size_with_preallocate_rows_and_data} should be larger
than {empty_rows_size_with_preallocate_rows}"
+ );
+ assert!(
+ empty_rows_size_with_preallocate_rows_and_data >
empty_rows_size_with_preallocate_data,
+ "{empty_rows_size_with_preallocate_rows_and_data} should be larger
than {empty_rows_size_with_preallocate_data}"
+ );
+ assert!(
+ empty_rows_size_with_preallocate_rows >
empty_rows_size_without_preallocate,
+ "{empty_rows_size_with_preallocate_rows} should be larger than
{empty_rows_size_without_preallocate}"
+ );
+ assert!(
+ empty_rows_size_with_preallocate_data >
empty_rows_size_without_preallocate,
+ "{empty_rows_size_with_preallocate_data} should be larger than
{empty_rows_size_without_preallocate}"
+ );
+ }
}