This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new b7f1198a1ff [fix](chinese) fix the issue where the be crashes due to the missing chinese dict (#30712) b7f1198a1ff is described below commit b7f1198a1ff116dd802086d669cb2caef1f75a20 Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Mon Feb 5 14:07:34 2024 +0800 [fix](chinese) fix the issue where the be crashes due to the missing chinese dict (#30712) --- be/src/clucene | 2 +- be/src/common/status.h | 1 + .../rowset/segment_v2/inverted_index_writer.cpp | 37 ++++++++++++---------- be/src/vec/functions/function_tokenize.cpp | 12 +++++-- 4 files changed, 33 insertions(+), 19 deletions(-) diff --git a/be/src/clucene b/be/src/clucene index f4829cc50f3..63ae98a8bc2 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit f4829cc50f32723366026c401fdb0111f15ee537 +Subproject commit 63ae98a8bc280dc4728dca744c3fe06e7a38caf1 diff --git a/be/src/common/status.h b/be/src/common/status.h index f8b07c58380..e5dcfe4fe81 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -273,6 +273,7 @@ namespace ErrorCode { E(INVERTED_INDEX_BUILD_WAITTING, -6008, false); \ E(INVERTED_INDEX_NOT_IMPLEMENTED, -6009, false); \ E(INVERTED_INDEX_COMPACTION_ERROR, -6010, false); \ + E(INVERTED_INDEX_ANALYZER_ERROR, -6011, false); \ E(KEY_NOT_FOUND, -7000, false); \ E(KEY_ALREADY_EXISTS, -7001, false); \ E(ENTRY_NOT_FOUND, -7002, false); \ diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 477d52d471f..07bea0c83f3 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -244,23 +244,28 @@ public: } Status create_analyzer(std::unique_ptr<lucene::analysis::Analyzer>& analyzer) { - switch (_parser_type) { - case InvertedIndexParserType::PARSER_STANDARD: - case InvertedIndexParserType::PARSER_UNICODE: - analyzer = std::make_unique<lucene::analysis::standard95::StandardAnalyzer>(); - break; - case InvertedIndexParserType::PARSER_ENGLISH: - analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>(); - break; - case InvertedIndexParserType::PARSER_CHINESE: - analyzer = create_chinese_analyzer(); - break; - default: - analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>(); - break; + try { + switch (_parser_type) { + case InvertedIndexParserType::PARSER_STANDARD: + case InvertedIndexParserType::PARSER_UNICODE: + analyzer = std::make_unique<lucene::analysis::standard95::StandardAnalyzer>(); + break; + case InvertedIndexParserType::PARSER_ENGLISH: + analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>(); + break; + case InvertedIndexParserType::PARSER_CHINESE: + analyzer = create_chinese_analyzer(); + break; + default: + analyzer = std::make_unique<lucene::analysis::SimpleAnalyzer<char>>(); + break; + } + setup_analyzer_lowercase(analyzer); + return Status::OK(); + } catch (CLuceneError& e) { + return Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>( + "inverted index create analyzer failed: {}", e.what()); } - setup_analyzer_lowercase(analyzer); - return Status::OK(); } void setup_analyzer_lowercase(std::unique_ptr<lucene::analysis::Analyzer>& analyzer) { diff --git a/be/src/vec/functions/function_tokenize.cpp b/be/src/vec/functions/function_tokenize.cpp index 2ecd164a598..1d9edbd7dbd 100644 --- a/be/src/vec/functions/function_tokenize.cpp +++ b/be/src/vec/functions/function_tokenize.cpp @@ -142,8 +142,16 @@ Status FunctionTokenize::execute_impl(FunctionContext* /*context*/, Block& block inverted_index_ctx.parser_mode = get_parser_mode_string_from_properties(properties); inverted_index_ctx.char_filter_map = get_parser_char_filter_map_from_properties(properties); - auto analyzer = - doris::segment_v2::InvertedIndexReader::create_analyzer(&inverted_index_ctx); + + std::unique_ptr<lucene::analysis::Analyzer> analyzer; + try { + analyzer = doris::segment_v2::InvertedIndexReader::create_analyzer( + &inverted_index_ctx); + } catch (CLuceneError& e) { + return Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>( + "inverted index create analyzer failed: {}", e.what()); + } + inverted_index_ctx.analyzer = analyzer.get(); _do_tokenize(*col_left, inverted_index_ctx, *dest_nested_column, dest_offsets, dest_nested_null_map); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org