This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch clucene-2.0 in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene-2.0 by this push: new 3526de75334 [fix](index compaction)Skip writing terms with a doc frequency of 0 (#248) (#249) 3526de75334 is described below commit 3526de75334f64aea3f299d395c8a460a9054e37 Author: qiye <jianliang5...@gmail.com> AuthorDate: Fri Nov 1 09:36:17 2024 +0800 [fix](index compaction)Skip writing terms with a doc frequency of 0 (#248) (#249) --- src/core/CLucene/index/IndexWriter.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/core/CLucene/index/IndexWriter.cpp b/src/core/CLucene/index/IndexWriter.cpp index 10dfd68c60d..dbca5655f55 100644 --- a/src/core/CLucene/index/IndexWriter.cpp +++ b/src/core/CLucene/index/IndexWriter.cpp @@ -1815,6 +1815,17 @@ void IndexWriter::mergeTerms(bool hasProx) { } for (int i = 0; i < numDestIndexes; ++i) { + if (dfs[i] == 0) { + if (infoStream != nullptr) { + std::string name = lucene_wcstoutf8string(smallestTerm->text(), smallestTerm->textLength()); + std::string field = lucene_wcstoutf8string(smallestTerm->field(), wcslen(smallestTerm->field())); + std::stringstream ss; + ss << "term: " << name << ", field: " << field << ", doc frequency is zero[" << dfs[i] << "], skip it." << "\n"; + message(ss.str()); + } + // if doc frequency is 0, it means the term is deleted. So we should not write it. + continue; + } DefaultSkipListWriter *skipListWriter = skipListWriterList[i]; CL_NS(store)::IndexOutput *freqOutput = freqOutputList[i]; CL_NS(store)::IndexOutput *proxOutput = proxOutputList[i]; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org