This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new c3a4d360368 [Pick 2.1](inverted index) fix tokenize function wrong 
result when params with space seperator  (#35997)
c3a4d360368 is described below

commit c3a4d360368a337b426b57224d3c280b1584e9d4
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Thu Jun 6 21:51:15 2024 +0800

    [Pick 2.1](inverted index) fix tokenize function wrong result when params 
with space seperator  (#35997)
    
    Pick from (#32671)
---
 be/src/vec/functions/function_tokenize.cpp                    | 2 +-
 regression-test/suites/inverted_index_p0/test_tokenize.groovy | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/functions/function_tokenize.cpp 
b/be/src/vec/functions/function_tokenize.cpp
index a1ea2e0be9b..e7dc2debe62 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -38,7 +38,7 @@ namespace doris::vectorized {
 
 Status parse(const std::string& str, std::map<std::string, std::string>& 
result) {
     boost::regex pattern(
-            
R"delimiter((?:'([^']*)'|"([^"]*)"|([^,]*))\s*=\s*(?:'([^']*)'|"([^"]*)"|([^,]*)))delimiter");
+            R"delimiter((?:'([^']*)'|"([^"]*)"|([^, 
]*))\s*=\s*(?:'([^']*)'|"([^"]*)"|([^, ]*)))delimiter");
     boost::smatch matches;
 
     std::string::const_iterator searchStart(str.cbegin());
diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy 
b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
index a03b2c85a55..8d7e2dac42e 100644
--- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
+++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
@@ -91,6 +91,7 @@ suite("test_tokenize"){
 
     sql "INSERT INTO $indexTblName3 VALUES (1, '我来到北京清华大学'), (2, '我爱你中国'), (3, 
'人民可以得到更多实惠'), (4, 
'陕西省西安市高新区创业大厦A座,我的手机号码是12345678901,邮箱是12345...@qq.com,,ip是1.1.1.1,this 
information is created automatically.');"
     qt_sql "SELECT TOKENIZE(c, 
\"'parser'='chinese','parser_mode'='fine_grained'\") FROM $indexTblName3";
+    qt_sql "SELECT TOKENIZE(c, \"'parser'='chinese', 
'parser_mode'='fine_grained'\") FROM $indexTblName3";
 
     qt_tokenize_sql """SELECT TOKENIZE('GET /images/hm_bg.jpg HTTP/1.0 
test:abc=bcd','"parser"="unicode","char_filter_type" = 
"char_replace","char_filter_pattern" = "._=:,","char_filter_replacement" = " 
"');"""
     qt_tokenize_sql """SELECT TOKENIZE('GET /images/hm_bg.jpg HTTP/1.0 
test:abc=bcd', '"parser"="unicode","char_filter_type" = "char_replace", 
"char_filter_pattern" = "._=:,", "char_filter_replacement" = " "');"""


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to