This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 32964879f74 [opt](inverted index) Add null document interface to 
optimize empty string indexing (#28661)
32964879f74 is described below

commit 32964879f74d1b3839e79c9a627fef18248d64a1
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Wed Dec 20 12:11:02 2023 +0800

    [opt](inverted index) Add null document interface to optimize empty string 
indexing (#28661)
---
 be/src/clucene                                        |  2 +-
 .../olap/rowset/segment_v2/inverted_index_writer.cpp  | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index d6adff12de6..4bd7d450173 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit d6adff12de6a687666edbc77ad8da6ec1ab32b88
+Subproject commit 4bd7d4501739c798c98b30d6350b243942d5f9bc
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index d397910891f..718d46f5e2b 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -230,6 +230,17 @@ public:
         return Status::OK();
     }
 
+    Status add_null_document() {
+        try {
+            _index_writer->addNullDocument(_doc.get());
+        } catch (const CLuceneError& e) {
+            _dir->deleteDirectory();
+            return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+                    "CLuceneError add_null_document: {}", e.what());
+        }
+        return Status::OK();
+    }
+
     Status add_nulls(uint32_t count) override {
         _null_bitmap.addRange(_rid, _rid + count);
         _rid += count;
@@ -242,7 +253,7 @@ public:
 
             for (int i = 0; i < count; ++i) {
                 new_fulltext_field(empty_value.c_str(), 0);
-                RETURN_IF_ERROR(add_document());
+                RETURN_IF_ERROR(add_null_document());
             }
         }
         return Status::OK();
@@ -292,10 +303,11 @@ public:
                     VLOG_DEBUG << "fulltext index value length can be at most 
256, but got "
                                << "value length:" << v->get_size() << ", 
ignore this value";
                     new_fulltext_field(empty_value.c_str(), 0);
+                    RETURN_IF_ERROR(add_null_document());
                 } else {
                     new_fulltext_field(v->get_data(), v->get_size());
+                    RETURN_IF_ERROR(add_document());
                 }
-                RETURN_IF_ERROR(add_document());
                 ++v;
                 _rid++;
             }
@@ -341,11 +353,12 @@ public:
                     VLOG_DEBUG << "fulltext index value length can be at most 
256, but got "
                                << "value length:" << value.length() << ", 
ignore this value";
                     new_fulltext_field(empty_value.c_str(), 0);
+                    RETURN_IF_ERROR(add_null_document());
                 } else {
                     new_fulltext_field(value.c_str(), value.length());
+                    RETURN_IF_ERROR(add_document());
                 }
                 _rid++;
-                RETURN_IF_ERROR(add_document());
             }
         } else if constexpr (field_is_numeric_type(field_type)) {
             for (int i = 0; i < count; ++i) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to