This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new e3fb3615ac8 [improvement](inverted index)Avoid parsing ignore_above in 
add_values and return error status when rowid_conversion is failed. (#32890) 
(#36200)
e3fb3615ac8 is described below

commit e3fb3615ac84060374d540f0bb01ae787e2dcd68
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Sun Jun 16 09:37:59 2024 +0800

    [improvement](inverted index)Avoid parsing ignore_above in add_values and 
return error status when rowid_conversion is failed. (#32890) (#36200)
---
 be/src/olap/compaction.cpp                              |  1 +
 be/src/olap/rowset/segment_v2/inverted_index_writer.cpp | 14 ++++++--------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index fcfd3abb522..5ca06457366 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -432,6 +432,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
                         _tablet->table_id());
                 DCHECK(false) << err_msg;
                 LOG(WARNING) << err_msg;
+                return Status::InternalError(err_msg);
             }
         }
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 30fbbeb3e0f..f2c891fefca 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -248,6 +248,9 @@ public:
             // array's inverted index do need create field first
             _doc->setNeedResetFieldData(true);
         }
+        auto ignore_above_value =
+                
get_parser_ignore_above_value_from_properties(_index_meta->properties());
+        _ignore_above = std::stoi(ignore_above_value);
         return Status::OK();
     }
 
@@ -374,13 +377,10 @@ public:
                         "field or index writer is null in inverted index 
writer");
             }
             auto* v = (Slice*)values;
-            auto ignore_above_value =
-                    
get_parser_ignore_above_value_from_properties(_index_meta->properties());
-            auto ignore_above = std::stoi(ignore_above_value);
             for (int i = 0; i < count; ++i) {
                 // only ignore_above UNTOKENIZED strings and empty strings not 
tokenized
                 if ((_parser_type == InvertedIndexParserType::PARSER_NONE &&
-                     v->get_size() > ignore_above) ||
+                     v->get_size() > _ignore_above) ||
                     (_parser_type != InvertedIndexParserType::PARSER_NONE && 
v->empty())) {
                     RETURN_IF_ERROR(add_null_document());
                 } else {
@@ -408,9 +408,6 @@ public:
                 LOG(ERROR) << "index writer is null in inverted index writer.";
                 return Status::InternalError("index writer is null in inverted 
index writer");
             }
-            auto ignore_above_value =
-                    
get_parser_ignore_above_value_from_properties(_index_meta->properties());
-            auto ignore_above = std::stoi(ignore_above_value);
             size_t start_off = 0;
             for (int i = 0; i < count; ++i) {
                 // nullmap & value ptr-array may not from offsets[i] because 
olap_convertor make offsets accumulate from _base_offset which may not is 0, 
but nullmap & value in this segment is from 0, we only need
@@ -432,7 +429,7 @@ public:
                     }
                     auto* v = (Slice*)((const uint8_t*)value_ptr + j * 
field_size);
                     if ((_parser_type == InvertedIndexParserType::PARSER_NONE 
&&
-                         v->get_size() > ignore_above) ||
+                         v->get_size() > _ignore_above) ||
                         (_parser_type != InvertedIndexParserType::PARSER_NONE 
&& v->empty())) {
                         // is here a null value?
                         // TODO. Maybe here has performance problem for large 
size string.
@@ -685,6 +682,7 @@ private:
     InvertedIndexParserType _parser_type;
     std::wstring _field_name;
     std::unique_ptr<DorisCompoundDirectory> _dir;
+    uint32_t _ignore_above;
 };
 
 Status InvertedIndexColumnWriter::create(const Field* field,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to