This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch clucene-2.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene-2.0 by this push:
     new 3526de75334 [fix](index compaction)Skip writing terms with a doc 
frequency of 0 (#248) (#249)
3526de75334 is described below

commit 3526de75334f64aea3f299d395c8a460a9054e37
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Fri Nov 1 09:36:17 2024 +0800

    [fix](index compaction)Skip writing terms with a doc frequency of 0 (#248) 
(#249)
---
 src/core/CLucene/index/IndexWriter.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 10dfd68c60d..dbca5655f55 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -1815,6 +1815,17 @@ void IndexWriter::mergeTerms(bool hasProx) {
         }
 
         for (int i = 0; i < numDestIndexes; ++i) {
+            if (dfs[i] == 0) {
+                if (infoStream != nullptr) {
+                    std::string name = 
lucene_wcstoutf8string(smallestTerm->text(), smallestTerm->textLength());
+                    std::string field = 
lucene_wcstoutf8string(smallestTerm->field(), wcslen(smallestTerm->field()));
+                    std::stringstream ss;
+                    ss << "term: " << name << ", field: " << field << ", doc 
frequency is zero[" << dfs[i] << "], skip it." << "\n";
+                    message(ss.str());
+                }
+                // if doc frequency is 0, it means the term is deleted. So we 
should not write it.
+                continue;
+            }
             DefaultSkipListWriter *skipListWriter = skipListWriterList[i];
             CL_NS(store)::IndexOutput *freqOutput = freqOutputList[i];
             CL_NS(store)::IndexOutput *proxOutput = proxOutputList[i];


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to