This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0-beta in repository https://gitbox.apache.org/repos/asf/doris.git
commit b2d47d98da512e5b8f11aa38644d8d1c391d1620 Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Wed Jun 7 07:27:47 2023 +0800 [fix](regex) String with Chinese characters matching failed (#20493) --- be/src/vec/functions/like.cpp | 5 +++-- .../sql_functions/string_functions/test_string_function_regexp.out | 6 ++++++ .../string_functions/test_string_function_regexp.groovy | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp index bcd8262d33..8d56c4f2d1 100644 --- a/be/src/vec/functions/like.cpp +++ b/be/src/vec/functions/like.cpp @@ -437,8 +437,9 @@ Status FunctionLikeBase::regexp_fn_predicate(LikeSearchState* state, Status FunctionLikeBase::hs_prepare(FunctionContext* context, const char* expression, hs_database_t** database, hs_scratch_t** scratch) { hs_compile_error_t* compile_err; - auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY, HS_MODE_BLOCK, nullptr, - database, &compile_err); + auto res = hs_compile(expression, HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8, + HS_MODE_BLOCK, nullptr, database, &compile_err); + if (res != HS_SUCCESS) { *database = nullptr; if (context) { diff --git a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out index 3c7d8473ae..415b8f2822 100644 --- a/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out +++ b/regression-test/data/query_p0/sql_functions/string_functions/test_string_function_regexp.out @@ -73,6 +73,12 @@ a-b c -- !sql -- a <b> b +-- !sql_utf1 -- +true + +-- !sql_utf2 -- +true + -- !sql_regexp_null -- \N \N diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy index ba4f941a4b..cb80939adf 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy @@ -63,6 +63,9 @@ suite("test_string_function_regexp") { qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");" qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');" + qt_sql_utf1 """ select '皖12345' REGEXP '^[皖][0-9]{5}\$'; """ + qt_sql_utf2 """ select '皖 12345' REGEXP '^[皖] [0-9]{5}\$'; """ + // bug fix sql """ INSERT INTO ${tbName} VALUES --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org