kzh1458003655-web commented on code in PR #59016:
URL: https://github.com/apache/doris/pull/59016#discussion_r2617779084


##########
be/src/vec/functions/function_string.h:
##########
@@ -5261,6 +5261,96 @@ class FunctionCrc32Internal : public IFunction {
     }
 };
 
+class NameLevenshtein {
+public:
+    static constexpr auto name = "levenshtein";
+};
+
+struct LevenshteinImpl {
+    static constexpr auto name = "levenshtein";
+
+    // 必需的类型定义
+    using ResultDataType = DataTypeInt32;
+    using ResultPaddedPODArray = ColumnInt32::Container;
+
+    static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
+        return std::make_shared<ResultDataType>();
+    }
+
+    // Helper: Get UTF-8 byte length
+    static size_t get_utf8_byte_length(unsigned char byte) {
+        if (byte < 0x80) return 1;
+        if ((byte & 0xE0) == 0xC0) return 2;
+        if ((byte & 0xF0) == 0xE0) return 3;
+        if ((byte & 0xF8) == 0xF0) return 4;
+        return 1;
+    }
+
+    // Helper: Convert UTF-8 byte stream to Unicode code points
+    static void to_utf32(const char* data, size_t size, std::vector<int32_t>& 
out) {
+        out.clear();
+        size_t i = 0;
+        while (i < size) {
+            size_t char_len = get_utf8_byte_length(static_cast<unsigned 
char>(data[i]));
+            if (i + char_len > size) char_len = 1;
+
+            int32_t code_point = 0;
+            for (size_t j = 0; j < char_len; ++j) {
+                code_point = (code_point << 8) | static_cast<unsigned 
char>(data[i + j]);
+            }
+            out.push_back(code_point);
+            i += char_len;
+        }
+    }
+
+    // 核心执行函数: 修正参数类型为 std::string_view
+    static void execute(std::string_view l, std::string_view r, int32_t& res) {
+        // Use thread_local for memory reuse
+        static thread_local std::vector<int32_t> a_code_points;
+        static thread_local std::vector<int32_t> b_code_points;
+
+        // ADMIN REQUIREMENT: Renaming or adding clear comments for these two 
DP arrays
+        static thread_local std::vector<int32_t> prev_row; // Previous row 
distances (i-1)

Review Comment:
   > 为什么要这么做`static thread_local`?这是错误的。
   
   im sorry.i want to reuse the vector , reducing repeated heap allocation



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to