liujiwen-up commented on code in PR #41681:
URL: https://github.com/apache/doris/pull/41681#discussion_r1801173448


##########
be/src/vec/functions/function_string.cpp:
##########
@@ -535,6 +545,135 @@ struct TrimUtil {
         return Status::OK();
     }
 };
+template <bool is_ltrim_in, bool is_rtrim_in, bool trim_single>
+struct TrimInUtil {
+    static Status vector(const ColumnString::Chars& str_data,
+                         const ColumnString::Offsets& str_offsets, const 
StringRef& remove_str,
+                         ColumnString::Chars& res_data, ColumnString::Offsets& 
res_offsets) {
+        const size_t offset_size = str_offsets.size();
+        res_offsets.resize(offset_size);
+        res_data.reserve(str_data.size());
+        bool all_ascii = simd::VStringFunctions::is_ascii(remove_str) &&
+                         simd::VStringFunctions::is_ascii(StringRef(
+                                 reinterpret_cast<const 
char*>(str_data.data()), str_data.size()));
+
+        if (all_ascii) {
+            return impl_vectors_ascii(str_data, str_offsets, remove_str, 
res_data, res_offsets);
+        } else {
+            return impl_vectors_utf8(str_data, str_offsets, remove_str, 
res_data, res_offsets);
+        }
+    }
+
+private:
+    static Status impl_vectors_ascii(const ColumnString::Chars& str_data,
+                                     const ColumnString::Offsets& str_offsets,
+                                     const StringRef& remove_str, 
ColumnString::Chars& res_data,
+                                     ColumnString::Offsets& res_offsets) {
+        const size_t offset_size = str_offsets.size();
+        std::bitset<128> char_lookup;
+        const char* remove_begin = remove_str.data;
+        const char* remove_end = remove_str.data + remove_str.size;
+
+        while (remove_begin < remove_end) {
+            char_lookup.set(static_cast<unsigned char>(*remove_begin));
+            remove_begin += 1;
+        }
+
+        for (size_t i = 0; i < offset_size; ++i) {
+            const char* str_begin = reinterpret_cast<const char*>(
+                    str_data.data() + (i == 0 ? 0 : str_offsets[i - 1]));

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to