This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new c3a4d360368 [Pick 2.1](inverted index) fix tokenize function wrong result when params with space seperator (#35997) c3a4d360368 is described below commit c3a4d360368a337b426b57224d3c280b1584e9d4 Author: airborne12 <airborn...@gmail.com> AuthorDate: Thu Jun 6 21:51:15 2024 +0800 [Pick 2.1](inverted index) fix tokenize function wrong result when params with space seperator (#35997) Pick from (#32671) --- be/src/vec/functions/function_tokenize.cpp | 2 +- regression-test/suites/inverted_index_p0/test_tokenize.groovy | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp index a1ea2e0be9b..e7dc2debe62 100644 --- a/be/src/vec/functions/function_tokenize.cpp +++ b/be/src/vec/functions/function_tokenize.cpp @@ -38,7 +38,7 @@ namespace doris::vectorized { Status parse(const std::string& str, std::map<std::string, std::string>& result) { boost::regex pattern( - R"delimiter((?:'([^']*)'|"([^"]*)"|([^,]*))\s*=\s*(?:'([^']*)'|"([^"]*)"|([^,]*)))delimiter"); + R"delimiter((?:'([^']*)'|"([^"]*)"|([^, ]*))\s*=\s*(?:'([^']*)'|"([^"]*)"|([^, ]*)))delimiter"); boost::smatch matches; std::string::const_iterator searchStart(str.cbegin()); diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy b/regression-test/suites/inverted_index_p0/test_tokenize.groovy index a03b2c85a55..8d7e2dac42e 100644 --- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy +++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy @@ -91,6 +91,7 @@ suite("test_tokenize"){ sql "INSERT INTO $indexTblName3 VALUES (1, '我来到北京清华大学'), (2, '我爱你中国'), (3, '人民可以得到更多实惠'), (4, '陕西省西安市高新区创业大厦A座,我的手机号码是12345678901,邮箱是12345...@qq.com,,ip是1.1.1.1,this information is created automatically.');" qt_sql "SELECT TOKENIZE(c, \"'parser'='chinese','parser_mode'='fine_grained'\") FROM $indexTblName3"; + qt_sql "SELECT TOKENIZE(c, \"'parser'='chinese', 'parser_mode'='fine_grained'\") FROM $indexTblName3"; qt_tokenize_sql """SELECT TOKENIZE('GET /images/hm_bg.jpg HTTP/1.0 test:abc=bcd','"parser"="unicode","char_filter_type" = "char_replace","char_filter_pattern" = "._=:,","char_filter_replacement" = " "');""" qt_tokenize_sql """SELECT TOKENIZE('GET /images/hm_bg.jpg HTTP/1.0 test:abc=bcd', '"parser"="unicode","char_filter_type" = "char_replace", "char_filter_pattern" = "._=:,", "char_filter_replacement" = " "');""" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org