This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 676ce907f93 branch-3.0: [fix](inverted index) Content Check for 
Tokenize Function Parser #44465 (#44554)
676ce907f93 is described below

commit 676ce907f935b96ff70a1f220420c6f65092fb5a
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Nov 27 11:52:50 2024 +0800

    branch-3.0: [fix](inverted index) Content Check for Tokenize Function 
Parser #44465 (#44554)
    
    Cherry-picked from #44465
    
    Co-authored-by: zzzxl <yangs...@selectdb.com>
---
 be/src/vec/functions/function_tokenize.cpp                    |  6 ++++++
 regression-test/suites/inverted_index_p0/test_tokenize.groovy | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/be/src/vec/functions/function_tokenize.cpp 
b/be/src/vec/functions/function_tokenize.cpp
index be0eb5dddc9..5c19ff1283a 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -142,6 +142,12 @@ Status FunctionTokenize::execute_impl(FunctionContext* 
/*context*/, Block& block
             }
             inverted_index_ctx.parser_type = 
get_inverted_index_parser_type_from_string(
                     get_parser_string_from_properties(properties));
+            if (inverted_index_ctx.parser_type == 
InvertedIndexParserType::PARSER_UNKNOWN) {
+                return 
Status::Error<doris::ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
+                        "unsupported parser type. currently, only 'english', 
'chinese', and "
+                        "'unicode' analyzers are supported.");
+            }
+
             inverted_index_ctx.parser_mode = 
get_parser_mode_string_from_properties(properties);
             inverted_index_ctx.char_filter_map =
                     get_parser_char_filter_map_from_properties(properties);
diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy 
b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
index 8d7e2dac42e..4672a39cedb 100644
--- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy
+++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+import java.sql.SQLException
 
 suite("test_tokenize"){
     // prepare test table
@@ -98,4 +99,14 @@ suite("test_tokenize"){
 
     qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode"');"""
     qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', 
'"parser"="unicode","stopwords" = "none"');"""
+
+    try {
+      sql """ SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="eng"'); """
+    } catch (SQLException e) {
+      if (e.message.contains("E-6000")) {
+        log.info("e message: {}", e.message)
+      } else {
+        throw e
+      }
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to