alamb commented on code in PR #21991:
URL: https://github.com/apache/datafusion/pull/21991#discussion_r3178102453
##########
datafusion/functions/src/unicode/reverse.rs:
##########
@@ -103,56 +101,79 @@ fn reverse(args: &[ArrayRef]) -> Result<ArrayRef> {
let len = args[0].len();
match args[0].data_type() {
+ LargeUtf8 => reverse_impl(
+ &args[0].as_string::<i64>(),
+ GenericStringArrayBuilder::<i64>::with_capacity(len, 1024),
+ ),
Utf8 => reverse_impl(
&args[0].as_string::<i32>(),
- StringBuilder::with_capacity(len, 1024),
+ GenericStringArrayBuilder::<i32>::with_capacity(len, 1024),
),
Utf8View => reverse_impl(
&args[0].as_string_view(),
- StringViewBuilder::with_capacity(len),
- ),
- LargeUtf8 => reverse_impl(
- &args[0].as_string::<i64>(),
- LargeStringBuilder::with_capacity(len, 1024),
+ StringViewArrayBuilder::with_capacity(len),
),
_ => unreachable!(
"Reverse can only be applied to Utf8View, Utf8 and LargeUtf8 types"
),
}
}
-fn reverse_impl<'a, StringArrType, StringBuilderType>(
+fn reverse_impl<'a, StringArrType, B>(
string_array: &StringArrType,
- mut array_builder: StringBuilderType,
+ mut array_builder: B,
) -> Result<ArrayRef>
where
StringArrType: StringArrayType<'a>,
- StringBuilderType: StringLikeArrayBuilder,
+ B: BulkNullStringArrayBuilder,
{
+ let item_len = string_array.len();
+ // Null-preserving: reuse the input null buffer as the output null buffer.
+ let nulls = string_array.nulls().cloned();
let mut string_buf = String::new();
let mut byte_buf = Vec::<u8>::new();
- for string in string_array.iter() {
- if let Some(s) = string {
- if s.is_ascii() {
- // reverse bytes directly since ASCII characters are single
bytes
- byte_buf.extend(s.as_bytes());
- byte_buf.reverse();
- // SAFETY: Since the original string was ASCII, reversing the
bytes still results in valid UTF-8.
- let reversed = unsafe {
std::str::from_utf8_unchecked(&byte_buf) };
- array_builder.append_value(reversed);
- byte_buf.clear();
+ if let Some(ref n) = nulls {
+ for i in 0..item_len {
+ if n.is_null(i) {
+ array_builder.append_placeholder();
} else {
- string_buf.extend(s.chars().rev());
- array_builder.append_value(&string_buf);
- string_buf.clear();
+ // SAFETY: `n.is_null(i)` was false in the branch above.
+ let s = unsafe { string_array.value_unchecked(i) };
+ append_reversed(s, &mut array_builder, &mut byte_buf, &mut
string_buf);
}
- } else {
- array_builder.append_null();
+ }
+ } else {
+ for i in 0..item_len {
+ // SAFETY: no null buffer means every index is valid.
+ let s = unsafe { string_array.value_unchecked(i) };
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]