This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new b7f1198a1ff [fix](chinese) fix the issue where the be crashes due to 
the missing chinese dict (#30712)
b7f1198a1ff is described below

commit b7f1198a1ff116dd802086d669cb2caef1f75a20
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Mon Feb 5 14:07:34 2024 +0800

    [fix](chinese) fix the issue where the be crashes due to the missing 
chinese dict (#30712)
---
 be/src/clucene                                     |  2 +-
 be/src/common/status.h                             |  1 +
 .../rowset/segment_v2/inverted_index_writer.cpp    | 37 ++++++++++++----------
 be/src/vec/functions/function_tokenize.cpp         | 12 +++++--
 4 files changed, 33 insertions(+), 19 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index f4829cc50f3..63ae98a8bc2 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit f4829cc50f32723366026c401fdb0111f15ee537
+Subproject commit 63ae98a8bc280dc4728dca744c3fe06e7a38caf1
diff --git a/be/src/common/status.h b/be/src/common/status.h
index f8b07c58380..e5dcfe4fe81 100644
--- a/be/src/common/status.h
+++ b/be/src/common/status.h
@@ -273,6 +273,7 @@ namespace ErrorCode {
     E(INVERTED_INDEX_BUILD_WAITTING, -6008, false);          \
     E(INVERTED_INDEX_NOT_IMPLEMENTED, -6009, false);         \
     E(INVERTED_INDEX_COMPACTION_ERROR, -6010, false);        \
+    E(INVERTED_INDEX_ANALYZER_ERROR, -6011, false);          \
     E(KEY_NOT_FOUND, -7000, false);                          \
     E(KEY_ALREADY_EXISTS, -7001, false);                     \
     E(ENTRY_NOT_FOUND, -7002, false);                        \
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 477d52d471f..07bea0c83f3 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -244,23 +244,28 @@ public:
     }
 
     Status create_analyzer(std::unique_ptr<lucene::analysis::Analyzer>& 
analyzer) {
-        switch (_parser_type) {
-        case InvertedIndexParserType::PARSER_STANDARD:
-        case InvertedIndexParserType::PARSER_UNICODE:
-            analyzer = 
std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
-            break;
-        case InvertedIndexParserType::PARSER_ENGLISH:
-            analyzer = 
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
-            break;
-        case InvertedIndexParserType::PARSER_CHINESE:
-            analyzer = create_chinese_analyzer();
-            break;
-        default:
-            analyzer = 
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
-            break;
+        try {
+            switch (_parser_type) {
+            case InvertedIndexParserType::PARSER_STANDARD:
+            case InvertedIndexParserType::PARSER_UNICODE:
+                analyzer = 
std::make_unique<lucene::analysis::standard95::StandardAnalyzer>();
+                break;
+            case InvertedIndexParserType::PARSER_ENGLISH:
+                analyzer = 
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
+                break;
+            case InvertedIndexParserType::PARSER_CHINESE:
+                analyzer = create_chinese_analyzer();
+                break;
+            default:
+                analyzer = 
std::make_unique<lucene::analysis::SimpleAnalyzer<char>>();
+                break;
+            }
+            setup_analyzer_lowercase(analyzer);
+            return Status::OK();
+        } catch (CLuceneError& e) {
+            return 
Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
+                    "inverted index create analyzer failed: {}", e.what());
         }
-        setup_analyzer_lowercase(analyzer);
-        return Status::OK();
     }
 
     void setup_analyzer_lowercase(std::unique_ptr<lucene::analysis::Analyzer>& 
analyzer) {
diff --git a/be/src/vec/functions/function_tokenize.cpp 
b/be/src/vec/functions/function_tokenize.cpp
index 2ecd164a598..1d9edbd7dbd 100644
--- a/be/src/vec/functions/function_tokenize.cpp
+++ b/be/src/vec/functions/function_tokenize.cpp
@@ -142,8 +142,16 @@ Status FunctionTokenize::execute_impl(FunctionContext* 
/*context*/, Block& block
             inverted_index_ctx.parser_mode = 
get_parser_mode_string_from_properties(properties);
             inverted_index_ctx.char_filter_map =
                     get_parser_char_filter_map_from_properties(properties);
-            auto analyzer =
-                    
doris::segment_v2::InvertedIndexReader::create_analyzer(&inverted_index_ctx);
+
+            std::unique_ptr<lucene::analysis::Analyzer> analyzer;
+            try {
+                analyzer = 
doris::segment_v2::InvertedIndexReader::create_analyzer(
+                        &inverted_index_ctx);
+            } catch (CLuceneError& e) {
+                return 
Status::Error<doris::ErrorCode::INVERTED_INDEX_ANALYZER_ERROR>(
+                        "inverted index create analyzer failed: {}", e.what());
+            }
+
             inverted_index_ctx.analyzer = analyzer.get();
             _do_tokenize(*col_left, inverted_index_ctx, *dest_nested_column, 
dest_offsets,
                          dest_nested_null_map);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to