This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 66cb95ffed7 branch-2.1: [fix](inverted index) Content Check for Tokenize Function Parser #44465 (#44555) 66cb95ffed7 is described below commit 66cb95ffed7c503ab2f8485d08a4139669b53d21 Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Wed Nov 27 11:53:36 2024 +0800 branch-2.1: [fix](inverted index) Content Check for Tokenize Function Parser #44465 (#44555) Cherry-picked from #44465 Co-authored-by: zzzxl <yangs...@selectdb.com> --- be/src/vec/functions/function_tokenize.cpp | 6 ++++++ regression-test/suites/inverted_index_p0/test_tokenize.groovy | 11 +++++++++++ 2 files changed, 17 insertions(+) diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp index e7dc2debe62..b1ec177d444 100644 --- a/be/src/vec/functions/function_tokenize.cpp +++ b/be/src/vec/functions/function_tokenize.cpp @@ -140,6 +140,12 @@ Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block } inverted_index_ctx.parser_type = get_inverted_index_parser_type_from_string( get_parser_string_from_properties(properties)); + if (inverted_index_ctx.parser_type == InvertedIndexParserType::PARSER_UNKNOWN) { + return Status::Error<doris::ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>( + "unsupported parser type. currently, only 'english', 'chinese', and " + "'unicode' analyzers are supported."); + } + inverted_index_ctx.parser_mode = get_parser_mode_string_from_properties(properties); inverted_index_ctx.char_filter_map = get_parser_char_filter_map_from_properties(properties); diff --git a/regression-test/suites/inverted_index_p0/test_tokenize.groovy b/regression-test/suites/inverted_index_p0/test_tokenize.groovy index 8d7e2dac42e..4672a39cedb 100644 --- a/regression-test/suites/inverted_index_p0/test_tokenize.groovy +++ b/regression-test/suites/inverted_index_p0/test_tokenize.groovy @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +import java.sql.SQLException suite("test_tokenize"){ // prepare test table @@ -98,4 +99,14 @@ suite("test_tokenize"){ qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode"');""" qt_tokenize_sql """SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="unicode","stopwords" = "none"');""" + + try { + sql """ SELECT TOKENIZE('华夏智胜新税股票A', '"parser"="eng"'); """ + } catch (SQLException e) { + if (e.message.contains("E-6000")) { + log.info("e message: {}", e.message) + } else { + throw e + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org