This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 66cb95ffed7 branch-2.1: [fix](inverted index) Content Check for 
Tokenize Function Parser #44465 (#44555)
66cb95ffed7 is described below

commit 66cb95ffed7c503ab2f8485d08a4139669b53d21
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 27 11:53:36 2024 +0800

    branch-2.1: [fix](inverted index) Content Check for Tokenize Function 
Parser #44465 (#44555)
    
    Cherry-picked from #44465
    
    Co-authored-by: zzzxl <yangs...@selectdb.com>
---
 be/src/vec/functions/function_tokenize.cpp                    |  6 ++++++
 regression-test/suites/inverted_index_p0/test_tokenize.groovy | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/be/src/vec/functions/function_tokenize.cpp 
b/be/src/vec/functions/function_tokenize.cpp
index e7dc2debe62..b1ec177d444 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -140,6 +140,12 @@ Status FunctionTokenize::execute_impl(FunctionContext* 
/*context*/, Block& block
             }
             inverted_index_ctx.parser_type = 
get_inverted_index_parser_type_from_string(
                     get_parser_string_from_properties(properties));
+            if (inverted_index_ctx.parser_type == 
InvertedIndexParserType::PARSER_UNKNOWN) {
+                return 
Status::Error<doris::ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+                        "unsupported parser type. currently, only 'english', 
'chinese', and "
+                        "'unicode' analyzers are supported.");
+            }
+
             inverted_index_ctx.parser_mode = 
get_parser_mode_string_from_properties(properties);
             inverted_index_ctx.char_filter_map =
                     get_parser_char_filter_map_from_properties(properties);
diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy 
b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
index 8d7e2dac42e..4672a39cedb 100644
--- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
+++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import java.sql.SQLException
 
 suite("test_tokenize"){
     // prepare test table
@@ -98,4 +99,14 @@ suite("test_tokenize"){
 
     qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode"');"""
     qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', 
'"parser"="unicode","stopwords" = "none"');"""
+
+    try {
+      sql """ SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="eng"'); """
+    } catch (SQLException e) {
+      if (e.message.contains("E-6000")) {
+        log.info("e message: {}", e.message)
+      } else {
+        throw e
+      }
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to