This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new e3fb3615ac8 [improvement](inverted index)Avoid parsing ignore_above in add_values and return error status when rowid_conversion is failed. (#32890) (#36200) e3fb3615ac8 is described below commit e3fb3615ac84060374d540f0bb01ae787e2dcd68 Author: qiye <jianliang5...@gmail.com> AuthorDate: Sun Jun 16 09:37:59 2024 +0800 [improvement](inverted index)Avoid parsing ignore_above in add_values and return error status when rowid_conversion is failed. (#32890) (#36200) --- be/src/olap/compaction.cpp | 1 + be/src/olap/rowset/segment_v2/inverted_index_writer.cpp | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index fcfd3abb522..5ca06457366 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -432,6 +432,7 @@ Status Compaction::do_compaction_impl(int64_t permits) { _tablet->table_id()); DCHECK(false) << err_msg; LOG(WARNING) << err_msg; + return Status::InternalError(err_msg); } } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index 30fbbeb3e0f..f2c891fefca 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -248,6 +248,9 @@ public: // array's inverted index do need create field first _doc->setNeedResetFieldData(true); } + auto ignore_above_value = + get_parser_ignore_above_value_from_properties(_index_meta->properties()); + _ignore_above = std::stoi(ignore_above_value); return Status::OK(); } @@ -374,13 +377,10 @@ public: "field or index writer is null in inverted index writer"); } auto* v = (Slice*)values; - auto ignore_above_value = - get_parser_ignore_above_value_from_properties(_index_meta->properties()); - auto ignore_above = std::stoi(ignore_above_value); for (int i = 0; i < count; ++i) { // only ignore_above UNTOKENIZED strings and empty strings not tokenized if ((_parser_type == InvertedIndexParserType::PARSER_NONE && - v->get_size() > ignore_above) || + v->get_size() > _ignore_above) || (_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) { RETURN_IF_ERROR(add_null_document()); } else { @@ -408,9 +408,6 @@ public: LOG(ERROR) << "index writer is null in inverted index writer."; return Status::InternalError("index writer is null in inverted index writer"); } - auto ignore_above_value = - get_parser_ignore_above_value_from_properties(_index_meta->properties()); - auto ignore_above = std::stoi(ignore_above_value); size_t start_off = 0; for (int i = 0; i < count; ++i) { // nullmap & value ptr-array may not from offsets[i] because olap_convertor make offsets accumulate from _base_offset which may not is 0, but nullmap & value in this segment is from 0, we only need @@ -432,7 +429,7 @@ public: } auto* v = (Slice*)((const uint8_t*)value_ptr + j * field_size); if ((_parser_type == InvertedIndexParserType::PARSER_NONE && - v->get_size() > ignore_above) || + v->get_size() > _ignore_above) || (_parser_type != InvertedIndexParserType::PARSER_NONE && v->empty())) { // is here a null value? // TODO. Maybe here has performance problem for large size string. @@ -685,6 +682,7 @@ private: InvertedIndexParserType _parser_type; std::wstring _field_name; std::unique_ptr<DorisCompoundDirectory> _dir; + uint32_t _ignore_above; }; Status InvertedIndexColumnWriter::create(const Field* field, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org