This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit e2b94586fd8531ed79399c595ffcd2d860559fc0
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Fri Aug 25 21:34:11 2023 +0800

    [fix](invert index) fix the keyword type index length limit (#23503)
---
 be/src/clucene                                          |  2 +-
 be/src/olap/rowset/segment_v2/column_writer.cpp         |  9 +++++----
 be/src/olap/rowset/segment_v2/inverted_index_writer.cpp | 17 ++++++++++++++---
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index fa33b52263..9e60ec666b 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit fa33b522639948f7845b6500910172f55eeae13a
+Subproject commit 9e60ec666b3ccf7dd8b7c3e331ac03ccf87d5845
diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp 
b/be/src/olap/rowset/segment_v2/column_writer.cpp
index dcb0f89858..2f27df779b 100644
--- a/be/src/olap/rowset/segment_v2/column_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/column_writer.cpp
@@ -510,7 +510,7 @@ Status ScalarColumnWriter::append_nulls(size_t num_rows) {
         _bitmap_index_builder->add_nulls(num_rows);
     }
     if (_opts.inverted_index) {
-        _inverted_index_builder->add_nulls(num_rows);
+        RETURN_IF_ERROR(_inverted_index_builder->add_nulls(num_rows));
     }
     if (_opts.need_bloom_filter) {
         _bloom_filter_index_builder->add_nulls(num_rows);
@@ -545,7 +545,8 @@ Status 
ScalarColumnWriter::append_data_in_current_page(const uint8_t* data, size
         _bitmap_index_builder->add_values(data, *num_written);
     }
     if (_opts.inverted_index) {
-        _inverted_index_builder->add_values(get_field()->name(), data, 
*num_written);
+        RETURN_IF_ERROR(
+                _inverted_index_builder->add_values(get_field()->name(), data, 
*num_written));
     }
     if (_opts.need_bloom_filter) {
         _bloom_filter_index_builder->add_values(data, *num_written);
@@ -934,8 +935,8 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, 
size_t num_rows) {
                 auto writer = 
dynamic_cast<ScalarColumnWriter*>(_item_writer.get());
                 if (writer != nullptr) {
                     //NOTE: use array field name as index field, but 
item_writer size should be used when moving item_data_ptr
-                    
_inverted_index_builder->add_array_values(_item_writer->get_field()->size(),
-                                                              col_cursor, 1);
+                    RETURN_IF_ERROR(_inverted_index_builder->add_array_values(
+                            _item_writer->get_field()->size(), col_cursor, 1));
                 }
             }
         }
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index 1762a60189..e779170d03 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -203,6 +203,17 @@ public:
         return Status::OK();
     }
 
+    Status add_document() {
+        try {
+            _index_writer->addDocument(_doc.get());
+        } catch (const CLuceneError& e) {
+            _dir->deleteDirectory();
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                    "CLuceneError add_document: {}", e.what());
+        }
+        return Status::OK();
+    }
+
     Status add_nulls(uint32_t count) override {
         _null_bitmap.addRange(_rid, _rid + count);
         _rid += count;
@@ -215,7 +226,7 @@ public:
 
             for (int i = 0; i < count; ++i) {
                 new_fulltext_field(empty_value.c_str(), 0);
-                _index_writer->addDocument(_doc.get());
+                RETURN_IF_ERROR(add_document());
             }
         }
         return Status::OK();
@@ -261,7 +272,7 @@ public:
             auto* v = (Slice*)values;
             for (int i = 0; i < count; ++i) {
                 new_fulltext_field(v->get_data(), v->get_size());
-                _index_writer->addDocument(_doc.get());
+                RETURN_IF_ERROR(add_document());
                 ++v;
                 _rid++;
             }
@@ -294,7 +305,7 @@ public:
                 auto value = join(strings, " ");
                 new_fulltext_field(value.c_str(), value.length());
                 _rid++;
-                _index_writer->addDocument(_doc.get());
+                RETURN_IF_ERROR(add_document());
                 values++;
             }
         } else if constexpr (field_is_numeric_type(field_type)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to