This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0-beta
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b2d47d98da512e5b8f11aa38644d8d1c391d1620
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Wed Jun 7 07:27:47 2023 +0800

    [fix](regex) String with Chinese characters matching failed (#20493)
---
 be/src/vec/functions/like.cpp                                       | 5 +++--
 .../sql_functions/string_functions/test_string_function_regexp.out  | 6 ++++++
 .../string_functions/test_string_function_regexp.groovy             | 3 +++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index bcd8262d33..8d56c4f2d1 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -437,8 +437,9 @@ Status 
FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state,
 Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* 
expression,
                                     hs_database_t** database, hs_scratch_t** 
scratch) {
     hs_compile_error_t* compile_err;
-    auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, 
HS_MODE_BLOCK, nullptr,
-                          database, &compile_err);
+    auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | 
HS_FLAG_UTF8,
+                          HS_MODE_BLOCK, nullptr, database, &compile_err);
+
     if (res != HS_SUCCESS) {
         *database = nullptr;
         if (context) {
diff --git 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
index 3c7d8473ae..415b8f2822 100644
--- 
a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
+++ 
b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out
@@ -73,6 +73,12 @@ a-b c
 -- !sql --
 a <b> b
 
+-- !sql_utf1 --
+true
+
+-- !sql_utf2 --
+true
+
 -- !sql_regexp_null --
 \N
 \N
diff --git 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
index ba4f941a4b..cb80939adf 100644
--- 
a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
+++ 
b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy
@@ -63,6 +63,9 @@ suite("test_string_function_regexp") {
     qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");"
     qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');"
 
+    qt_sql_utf1 """ select '皖12345' REGEXP '^[皖][0-9]{5}\$'; """
+    qt_sql_utf2 """ select '皖 12345' REGEXP '^[皖] [0-9]{5}\$'; """
+
     // bug fix
     sql """
         INSERT INTO ${tbName} VALUES


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to