This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push:
new 9511ebd0df0 [fix](index writer) remove unused deleter code (#328)
9511ebd0df0 is described below
commit 9511ebd0df0c0ed7296c3a41bf14e012dd68c2fd
Author: airborne12 <[email protected]>
AuthorDate: Fri Jul 11 16:21:34 2025 +0800
[fix](index writer) remove unused deleter code (#328)
---
src/core/CLucene/index/IndexWriter.cpp | 184 +++++----------------------------
src/core/CLucene/index/IndexWriter.h | 4 +-
2 files changed, 26 insertions(+), 162 deletions(-)
diff --git a/src/core/CLucene/index/IndexWriter.cpp
b/src/core/CLucene/index/IndexWriter.cpp
index 44b4492df7f..d6d0a28d5dd 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -5,6 +5,7 @@
* the GNU Lesser General Public License, as specified in the COPYING file.
------------------------------------------------------------------------------*/
#include "CLucene/_ApiHeader.h"
+#include "CLucene/index/IndexWriter.h"
#include "CLucene/analysis/AnalysisHeader.h"
#include "CLucene/analysis/Analyzers.h"
@@ -15,11 +16,8 @@
#include "CLucene/store/Directory.h"
#include "CLucene/util/Misc.h"
#include "CLucene/util/PFORUtil.h"
-#include "IndexReader.h"
-#include "IndexWriter.h"
-
+#include "CLucene/index/IndexReader.h"
#include "CLucene/index/MergePolicy.h"
-#include "CLucene/search/Similarity.h"
#include "CLucene/store/FSDirectory.h"
#include "CLucene/store/_Lock.h"
#include "CLucene/store/_RAMDirectory.h"
@@ -27,17 +25,18 @@
#include "CLucene/util/PriorityQueue.h"
#include "CLucene/index/CodeMode.h"
#include "CLucene/analysis/standard95/StandardAnalyzer.h"
-#include "MergePolicy.h"
-#include "MergeScheduler.h"
-#include "SDocumentWriter.h"
-#include "_DocumentsWriter.h"
-#include "_IndexFileDeleter.h"
-#include "_SegmentHeader.h"
-#include "_SegmentInfos.h"
-#include "_SegmentMerger.h"
-#include "_SkipListWriter.h"
-#include "_Term.h"
-#include "_TermInfo.h"
+#include "CLucene/index/MergePolicy.h"
+#include "CLucene/index/MergeScheduler.h"
+#include "CLucene/index/SDocumentWriter.h"
+#include "CLucene/index/_DocumentsWriter.h"
+#include "CLucene/index/_IndexFileDeleter.h"
+#include "CLucene/index/_SegmentHeader.h"
+#include "CLucene/index/_SegmentInfos.h"
+#include "CLucene/index/_SegmentMerger.h"
+#include "CLucene/index/_SkipListWriter.h"
+#include "CLucene/index/_Term.h"
+#include "CLucene/index/_TermInfo.h"
+
#include <algorithm>
#include <memory>
#include <assert.h>
@@ -107,7 +106,6 @@ void IndexWriter::deinit(bool releaseWriteLock) throw() {
_CLLDELETE(segmentsToOptimize);
_CLLDELETE(mergeScheduler);
_CLLDELETE(mergePolicy);
- _CLLDELETE(deleter);
_CLLDELETE(docWriter);
if (bOwnsDirectory) _CLLDECDELETE(directory);
delete _internal;
@@ -253,8 +251,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const
bool create, const bool
this->autoCommit = true;
this->segmentInfos = _CLNEW SegmentInfos;
this->mergeGen = 0;
- this->rollbackSegmentInfos = NULL;
- this->deleter = NULL;
this->docWriter = NULL;
this->writeLock = NULL;
this->fieldInfos = NULL;
@@ -295,11 +291,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const
bool create, const bool
}
this->autoCommit = autoCommit;
- if (!autoCommit) {
- rollbackSegmentInfos = segmentInfos->clone();
- } else {
- rollbackSegmentInfos = NULL;
- }
if (analyzer != nullptr) {
if (analyzer->isSDocOpt()) {
docWriter = _CLNEW SDocumentsWriter<char>(directory, this);
@@ -309,11 +300,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const
bool create, const bool
} else {
_CLTHROWA(CL_ERR_IllegalArgument, "IndexWriter::init: Only support
SDocumentsWriter");
}
- // Default deleter (for backwards compatibility) is
- // KeepOnlyLastCommitDeleter:
- deleter = _CLNEW IndexFileDeleter(directory,
- deletionPolicy == NULL ? _CLNEW
KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
- segmentInfos, infoStream, docWriter);
pushMaxBufferedDocs();
@@ -477,7 +463,6 @@ void IndexWriter::setInfoStream(std::ostream *infoStream) {
this->infoStream = infoStream;
setMessageID();
docWriter->setInfoStream(infoStream);
- deleter->setInfoStream(infoStream);
if (infoStream != NULL)
messageState();
}
@@ -588,19 +573,14 @@ void IndexWriter::closeInternal(bool waitForMerges) {
if (infoStream != NULL)
message("close: wrote segments file \"" +
segmentInfos->getCurrentSegmentFileName() + "\"");
- deleter->checkpoint(segmentInfos, true);
-
commitPending = false;
// _CLDELETE(rollbackSegmentInfos);
}
- _CLDELETE(rollbackSegmentInfos);
-
if (infoStream != NULL)
message("at close: " + segString());
_CLDELETE(docWriter);
- deleter->close();
}
if (closeDir)
@@ -689,11 +669,8 @@ bool IndexWriter::flushDocStores() {
si->getDocStoreSegment().compare(docStoreSegment) == 0)
si->setDocStoreIsCompoundFile(false);
}
- deleter->deleteFile(compoundFileName.c_str());
deletePartialSegmentsFile();
})
-
- deleter->checkpoint(segmentInfos, false);
}
}
@@ -799,15 +776,6 @@ void IndexWriter::updateDocument(Term *term, Document
*doc, Analyzer *analyzer)
if (!success) {
if (infoStream != NULL)
message(string("hit exception updating document"));
-
- {
- SCOPED_LOCK_MUTEX(this->THIS_LOCK)
- // If docWriter has some aborted files that were
- // never incref'd, then we clean them up here
- const std::vector<std::string> *files =
docWriter->abortedFiles();
- if (files != NULL)
- deleter->deleteNewFiles(*files);
- }
})
if (doFlush)
flush(true, false);
@@ -1033,10 +1001,7 @@ void IndexWriter::startTransaction() {
flush();
// Turn off auto-commit during our local transaction:
autoCommit = false;
- } else
- // We must "protect" our files at this point from
- // deletion in case we need to rollback:
- deleter->incRef(segmentInfos, false);
+ }
}
void IndexWriter::rollbackTransaction() {
@@ -1056,15 +1021,6 @@ void IndexWriter::rollbackTransaction() {
segmentInfos->insert(localRollbackSegmentInfos, true);
_CLDELETE(localRollbackSegmentInfos);
- // Ask deleter to locate unreferenced files we had
- // created & remove them:
- deleter->checkpoint(segmentInfos, false);
-
- if (!autoCommit)
- // Remove the incRef we did in startTransaction:
- deleter->decRef(segmentInfos);
-
- deleter->refresh();
finishMerges(false);
stopMerges = false;
}
@@ -1090,15 +1046,6 @@ void IndexWriter::commitTransaction() {
rollbackTransaction();
})
-
- if (!autoCommit)
- // Remove the incRef we did in startTransaction.
- deleter->decRef(localRollbackSegmentInfos);
-
- _CLDELETE(localRollbackSegmentInfos);
-
- // Give deleter a chance to remove files now:
- deleter->checkpoint(segmentInfos, autoCommit);
}
void IndexWriter::abort() {
@@ -1135,14 +1082,8 @@ void IndexWriter::abort() {
// will always write to a _CLNEW generation ("write
// once").
segmentInfos->clear();
- segmentInfos->insert(rollbackSegmentInfos, false);
docWriter->abort(NULL);
-
- // Ask deleter to locate unreferenced files & remove
- // them:
- deleter->checkpoint(segmentInfos, false);
- deleter->refresh();
}
commitPending = false;
@@ -2248,14 +2189,14 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
// have to fix the "applyDeletesSelectively" logic to
// apply to more than just the last flushed segment
//bool flushDeletes = sdocWriter->hasDeletes();
- bool flushDeletes = docWriter->hasDeletes();
+ //bool flushDeletes = docWriter->hasDeletes();
if (infoStream != NULL) {
message(" flush: segment=" + docWriter->getSegment() +
" docStoreSegment=" + docWriter->getDocStoreSegment() +
" docStoreOffset=" +
Misc::toString(docWriter->getDocStoreOffset()) +
" flushDocs=" + Misc::toString(flushDocs) +
- " flushDeletes=" + Misc::toString(flushDeletes) +
+ //" flushDeletes=" + Misc::toString(flushDeletes) +
" flushDocStores=" + Misc::toString(_flushDocStores) +
" numDocs=" + Misc::toString(numDocs) +
" numBufDelTerms=" +
Misc::toString(docWriter->getNumBufferedDeleteTerms()));
@@ -2290,13 +2231,7 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
// If we are flushing docs, segment must not be NULL:
assert(!segment.empty() || !flushDocs);
- if (flushDocs || flushDeletes) {
-
- SegmentInfos *rollback = NULL;
-
- if (flushDeletes)
- rollback = segmentInfos->clone();
-
+ if (flushDocs) {
bool success = false;
try {
@@ -2323,13 +2258,6 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
segmentInfos->insert(newSegment);
}
- if (flushDeletes)
- // we should be able to change this so we can
- // buffer deletes longer and then flush them to
- // multiple flushed segments, when
- // autoCommit=false
- applyDeletes(flushDocs);
-
doAfterFlush();
checkpoint();
@@ -2339,44 +2267,16 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
if (!success) {
if (infoStream != NULL)
message("hit exception flushing segment " +
segment);
-
- if (flushDeletes) {
-
- // Carefully check if any partial .del files
- // should be removed:
- const int32_t size = rollback->size();
- for (int32_t i = 0; i < size; i++) {
- const string newDelFileName =
segmentInfos->info(i)->getDelFileName();
- const string delFileName =
rollback->info(i)->getDelFileName();
- if (!newDelFileName.empty() &&
newDelFileName.compare(delFileName) != 0)
-
deleter->deleteFile(newDelFileName.c_str());
- }
-
- // Fully replace the segmentInfos since flushed
- // deletes could have changed any of the
- // SegmentInfo instances:
- segmentInfos->clear();
- assert(false);//test me..
- segmentInfos->insert(rollback, false);
-
- } else {
+ {
// Remove segment we added, if any:
if (newSegment != NULL &&
segmentInfos->size() > 0 &&
segmentInfos->info(segmentInfos->size() - 1)
== newSegment)
segmentInfos->remove(segmentInfos->size() - 1);
}
- if (flushDocs) {}
- //sdocWriter->abort(NULL);
+ docWriter->abort(NULL);
deletePartialSegmentsFile();
- deleter->checkpoint(segmentInfos, false);
-
- if (!segment.empty())
- deleter->refresh(segment.c_str());
- } else if (flushDeletes)
- _CLDELETE(rollback);)
-
- deleter->checkpoint(segmentInfos, autoCommit);
+ })
if (flushDocs && mergePolicy->useCompoundFile(segmentInfos,
newSegment)) {
@@ -2392,11 +2292,8 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
if (infoStream != NULL)
message("hit exception creating compound file
for newly flushed segment " + segment);
newSegment->setUseCompoundFile(false);
- deleter->deleteFile((segment + "." +
IndexFileNames::COMPOUND_FILE_EXTENSION).c_str());
deletePartialSegmentsFile();
})
-
- deleter->checkpoint(segmentInfos, autoCommit);
}
ret = true;
@@ -2470,7 +2367,6 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge
*_merge) {
assert(_merge->increfDone);
decrefMergeSegments(_merge);
- deleter->refresh(_merge->info->name.c_str());
return false;
}
@@ -2566,7 +2462,6 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge
*_merge) {
if (!success) {
if (infoStream != NULL)
message(string("hit exception creating merged deletes
file"));
- deleter->refresh(_merge->info->name.c_str());
})
// Simple optimization: if the doc store we are using
@@ -2607,17 +2502,11 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge
*_merge) {
segmentInfos->clear();
segmentInfos->insert(rollback, true);
deletePartialSegmentsFile();
- deleter->refresh(_merge->info->name.c_str());
} _CLDELETE(rollback);)
if (_merge->optimize)
segmentsToOptimize->push_back(_merge->info);
- // Must checkpoint before decrefing so any newly
- // referenced files in the _CLNEW merge->info are incref'd
- // first:
- deleter->checkpoint(segmentInfos, autoCommit);
-
decrefMergeSegments(_merge);
return true;
@@ -2631,10 +2520,6 @@ void
IndexWriter::decrefMergeSegments(MergePolicy::OneMerge *_merge) {
_merge->increfDone = false;
for (int32_t i = 0; i < numSegmentsToMerge; i++) {
SegmentInfo *previousInfo = sourceSegmentsClone->info(i);
- // Decref all files for this SegmentInfo (this
- // matches the incref in mergeInit):
- if (previousInfo->dir == directory)
- deleter->decRef(previousInfo->files());
}
}
@@ -2679,8 +2564,6 @@ void IndexWriter::merge(MergePolicy::OneMerge *_merge) {
if (infoStream != NULL)
message(string("hit exception during merge"));
addMergeException(_merge);
- if (_merge->info != NULL &&
segmentInfos->indexOf(_merge->info) == -1)
- deleter->refresh(_merge->info->name.c_str());
}
// This merge (and, generally, any change to the
@@ -2870,11 +2753,6 @@ void IndexWriter::_mergeInit(MergePolicy::OneMerge
*_merge) {
for (int32_t i = 0; i < end; i++) {
SegmentInfo *si = _merge->segmentsClone->info(i);
-
- // IncRef all files for this segment info to make sure
- // they are not removed while we are trying to merge->
- if (si->dir == directory)
- deleter->incRef(si->files());
}
_merge->increfDone = true;
@@ -2968,7 +2846,6 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge
*_merge) {
{
SCOPED_LOCK_MUTEX(this->THIS_LOCK)
addMergeException(_merge);
- deleter->refresh(mergedName.c_str());
}
})
@@ -3019,7 +2896,6 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge
*_merge) {
SCOPED_LOCK_MUTEX(this->THIS_LOCK)
if (!skip)
addMergeException(_merge);
- deleter->deleteFile(compoundFileName.c_str());
}
})
@@ -3027,18 +2903,12 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge
*_merge) {
{
SCOPED_LOCK_MUTEX(this->THIS_LOCK)
- if (skip || segmentInfos->indexOf(_merge->info) == -1 ||
_merge->isAborted()) {
- // Our segment (committed in non-compound
- // format) got merged away while we were
- // building the compound format.
- deleter->deleteFile(compoundFileName.c_str());
- } else {
- success = false;
- try {
+ success = false;
+ try {
_merge->info->setUseCompoundFile(true);
checkpoint();
success = true;
- }
+ }
_CLFINALLY(
if (!success) {
if (infoStream != NULL)
@@ -3048,12 +2918,7 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge
*_merge) {
addMergeException(_merge);
_merge->info->setUseCompoundFile(false);
deletePartialSegmentsFile();
- deleter->deleteFile(compoundFileName.c_str());
})
-
- // Give deleter a chance to remove files now.
- deleter->checkpoint(segmentInfos, autoCommit);
- }
}
}
}
@@ -3083,7 +2948,6 @@ void IndexWriter::deletePartialSegmentsFile() {
if (infoStream != NULL)
message("now delete partial segments file \"" + segmentFileName +
"\"");
- deleter->deleteFile(segmentFileName.c_str());
}
}
diff --git a/src/core/CLucene/index/IndexWriter.h
b/src/core/CLucene/index/IndexWriter.h
index 1fef37e5d7c..33df65cad46 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -230,7 +230,7 @@ class CLUCENE_EXPORT IndexWriter:LUCENE_BASE {
bool commitPending; // true if segmentInfos has changes not yet committed
- SegmentInfos* rollbackSegmentInfos; // segmentInfos we will fallback to
if the commit fails
+ //SegmentInfos* rollbackSegmentInfos; // segmentInfos we will fallback
to if the commit fails
SegmentInfos* localRollbackSegmentInfos; // segmentInfos we will
fallback to if the commit fails
bool localAutoCommit; // saved autoCommit during local
transaction
@@ -240,7 +240,7 @@ class CLUCENE_EXPORT IndexWriter:LUCENE_BASE {
//typedef SDocumentsWriter<char> SDocumentsWriterType;
//SDocumentsWriterType *sdocWriter;
//lucene::index::SDocumentsWriter<char>* sdocWriter;
- IndexFileDeleter* deleter;
+ //IndexFileDeleter* deleter;
typedef std::vector<SegmentInfo*> SegmentsToOptimizeType;
SegmentsToOptimizeType* segmentsToOptimize; // used by optimize to
note those needing optimization
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]