comphead commented on code in PR #23192:
URL: https://github.com/apache/datafusion/pull/23192#discussion_r3476595754
##########
datafusion/functions-nested/src/array_compact.rs:
##########
@@ -130,42 +129,49 @@ fn compact_list<O: OffsetSizeTrait>(
field: &Arc<arrow::datatypes::Field>,
) -> Result<ArrayRef> {
let values = list_array.values();
+ // Use logical nulls so element types without a validity buffer
+ // (e.g. NullArray) are still treated as null.
+ let values_null_count = values.logical_null_count();
// Fast path: no nulls in values, return input unchanged
- if values.null_count() == 0 {
+ if values_null_count == 0 {
return Ok(Arc::new(list_array.clone()));
}
+ let values_nulls = values
+ .logical_nulls()
+ .expect("non-zero logical_null_count implies logical_nulls is Some");
+ let list_nulls = list_array.nulls();
+ let list_offsets = list_array.offsets();
let original_data = values.to_data();
- let capacity = original_data.len() - values.null_count();
- let mut offsets = Vec::<O>::with_capacity(list_array.len() + 1);
- offsets.push(O::zero());
+ let capacity = original_data.len() - values_null_count;
+ let mut offsets = OffsetBufferBuilder::<O>::new(list_array.len());
Review Comment:
Thanks @alamb for catching this, placed it back. I believe Claude took a
pattern from other array functions, that currently use `OffsetBufferBuilder`.
I'll check if those existing usages can also be replaced with `Vec`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]