This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new 9511ebd0df0 [fix](index writer) remove unused deleter code (#328)
9511ebd0df0 is described below

commit 9511ebd0df0c0ed7296c3a41bf14e012dd68c2fd
Author: airborne12 <[email protected]>
AuthorDate: Fri Jul 11 16:21:34 2025 +0800

    [fix](index writer) remove unused deleter code (#328)
---
 src/core/CLucene/index/IndexWriter.cpp | 184 +++++----------------------------
 src/core/CLucene/index/IndexWriter.h   |   4 +-
 2 files changed, 26 insertions(+), 162 deletions(-)

diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 44b4492df7f..d6d0a28d5dd 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -5,6 +5,7 @@
 * the GNU Lesser General Public License, as specified in the COPYING file.
 
------------------------------------------------------------------------------*/
 #include "CLucene/_ApiHeader.h"
+#include "CLucene/index/IndexWriter.h"
 
 #include "CLucene/analysis/AnalysisHeader.h"
 #include "CLucene/analysis/Analyzers.h"
@@ -15,11 +16,8 @@
 #include "CLucene/store/Directory.h"
 #include "CLucene/util/Misc.h"
 #include "CLucene/util/PFORUtil.h"
-#include "IndexReader.h"
-#include "IndexWriter.h"
-
+#include "CLucene/index/IndexReader.h"
 #include "CLucene/index/MergePolicy.h"
-#include "CLucene/search/Similarity.h"
 #include "CLucene/store/FSDirectory.h"
 #include "CLucene/store/_Lock.h"
 #include "CLucene/store/_RAMDirectory.h"
@@ -27,17 +25,18 @@
 #include "CLucene/util/PriorityQueue.h"
 #include "CLucene/index/CodeMode.h"
 #include "CLucene/analysis/standard95/StandardAnalyzer.h"
-#include "MergePolicy.h"
-#include "MergeScheduler.h"
-#include "SDocumentWriter.h"
-#include "_DocumentsWriter.h"
-#include "_IndexFileDeleter.h"
-#include "_SegmentHeader.h"
-#include "_SegmentInfos.h"
-#include "_SegmentMerger.h"
-#include "_SkipListWriter.h"
-#include "_Term.h"
-#include "_TermInfo.h"
+#include "CLucene/index/MergePolicy.h"
+#include "CLucene/index/MergeScheduler.h"
+#include "CLucene/index/SDocumentWriter.h"
+#include "CLucene/index/_DocumentsWriter.h"
+#include "CLucene/index/_IndexFileDeleter.h"
+#include "CLucene/index/_SegmentHeader.h"
+#include "CLucene/index/_SegmentInfos.h"
+#include "CLucene/index/_SegmentMerger.h"
+#include "CLucene/index/_SkipListWriter.h"
+#include "CLucene/index/_Term.h"
+#include "CLucene/index/_TermInfo.h"
+
 #include <algorithm>
 #include <memory>
 #include <assert.h>
@@ -107,7 +106,6 @@ void IndexWriter::deinit(bool releaseWriteLock) throw() {
     _CLLDELETE(segmentsToOptimize);
     _CLLDELETE(mergeScheduler);
     _CLLDELETE(mergePolicy);
-    _CLLDELETE(deleter);
     _CLLDELETE(docWriter);
     if (bOwnsDirectory) _CLLDECDELETE(directory);
     delete _internal;
@@ -253,8 +251,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
     this->autoCommit = true;
     this->segmentInfos = _CLNEW SegmentInfos;
     this->mergeGen = 0;
-    this->rollbackSegmentInfos = NULL;
-    this->deleter = NULL;
     this->docWriter = NULL;
     this->writeLock = NULL;
     this->fieldInfos = NULL;
@@ -295,11 +291,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
         }
 
         this->autoCommit = autoCommit;
-        if (!autoCommit) {
-            rollbackSegmentInfos = segmentInfos->clone();
-        } else {
-            rollbackSegmentInfos = NULL;
-        }
         if (analyzer != nullptr) {
             if (analyzer->isSDocOpt()) {
                 docWriter = _CLNEW SDocumentsWriter<char>(directory, this);
@@ -309,11 +300,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
         } else {
             _CLTHROWA(CL_ERR_IllegalArgument, "IndexWriter::init: Only support 
SDocumentsWriter");
         }
-        // Default deleter (for backwards compatibility) is
-        // KeepOnlyLastCommitDeleter:
-        deleter = _CLNEW IndexFileDeleter(directory,
-                                          deletionPolicy == NULL ? _CLNEW 
KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
-                                          segmentInfos, infoStream, docWriter);
 
         pushMaxBufferedDocs();
 
@@ -477,7 +463,6 @@ void IndexWriter::setInfoStream(std::ostream *infoStream) {
     this->infoStream = infoStream;
     setMessageID();
     docWriter->setInfoStream(infoStream);
-    deleter->setInfoStream(infoStream);
     if (infoStream != NULL)
         messageState();
 }
@@ -588,19 +573,14 @@ void IndexWriter::closeInternal(bool waitForMerges) {
                 if (infoStream != NULL)
                     message("close: wrote segments file \"" + 
segmentInfos->getCurrentSegmentFileName() + "\"");
 
-                deleter->checkpoint(segmentInfos, true);
-
                 commitPending = false;
                 //        _CLDELETE(rollbackSegmentInfos);
             }
-            _CLDELETE(rollbackSegmentInfos);
-
 
             if (infoStream != NULL)
                 message("at close: " + segString());
 
             _CLDELETE(docWriter);
-            deleter->close();
         }
 
         if (closeDir)
@@ -689,11 +669,8 @@ bool IndexWriter::flushDocStores() {
                                 
si->getDocStoreSegment().compare(docStoreSegment) == 0)
                                 si->setDocStoreIsCompoundFile(false);
                         }
-                        deleter->deleteFile(compoundFileName.c_str());
                         deletePartialSegmentsFile();
                     })
-
-            deleter->checkpoint(segmentInfos, false);
         }
     }
 
@@ -799,15 +776,6 @@ void IndexWriter::updateDocument(Term *term, Document 
*doc, Analyzer *analyzer)
                 if (!success) {
                     if (infoStream != NULL)
                         message(string("hit exception updating document"));
-
-                    {
-                        SCOPED_LOCK_MUTEX(this->THIS_LOCK)
-                        // If docWriter has some aborted files that were
-                        // never incref'd, then we clean them up here
-                        const std::vector<std::string> *files = 
docWriter->abortedFiles();
-                        if (files != NULL)
-                            deleter->deleteNewFiles(*files);
-                    }
                 })
         if (doFlush)
             flush(true, false);
@@ -1033,10 +1001,7 @@ void IndexWriter::startTransaction() {
         flush();
         // Turn off auto-commit during our local transaction:
         autoCommit = false;
-    } else
-        // We must "protect" our files at this point from
-        // deletion in case we need to rollback:
-        deleter->incRef(segmentInfos, false);
+    }
 }
 
 void IndexWriter::rollbackTransaction() {
@@ -1056,15 +1021,6 @@ void IndexWriter::rollbackTransaction() {
     segmentInfos->insert(localRollbackSegmentInfos, true);
     _CLDELETE(localRollbackSegmentInfos);
 
-    // Ask deleter to locate unreferenced files we had
-    // created & remove them:
-    deleter->checkpoint(segmentInfos, false);
-
-    if (!autoCommit)
-        // Remove the incRef we did in startTransaction:
-        deleter->decRef(segmentInfos);
-
-    deleter->refresh();
     finishMerges(false);
     stopMerges = false;
 }
@@ -1090,15 +1046,6 @@ void IndexWriter::commitTransaction() {
 
                 rollbackTransaction();
             })
-
-    if (!autoCommit)
-        // Remove the incRef we did in startTransaction.
-        deleter->decRef(localRollbackSegmentInfos);
-
-    _CLDELETE(localRollbackSegmentInfos);
-
-    // Give deleter a chance to remove files now:
-    deleter->checkpoint(segmentInfos, autoCommit);
 }
 
 void IndexWriter::abort() {
@@ -1135,14 +1082,8 @@ void IndexWriter::abort() {
             // will always write to a _CLNEW generation ("write
             // once").
             segmentInfos->clear();
-            segmentInfos->insert(rollbackSegmentInfos, false);
 
             docWriter->abort(NULL);
-
-            // Ask deleter to locate unreferenced files & remove
-            // them:
-            deleter->checkpoint(segmentInfos, false);
-            deleter->refresh();
         }
 
         commitPending = false;
@@ -2248,14 +2189,14 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
         // have to fix the "applyDeletesSelectively" logic to
         // apply to more than just the last flushed segment
         //bool flushDeletes = sdocWriter->hasDeletes();
-        bool flushDeletes = docWriter->hasDeletes();
+        //bool flushDeletes = docWriter->hasDeletes();
 
         if (infoStream != NULL) {
             message("  flush: segment=" + docWriter->getSegment() +
                     " docStoreSegment=" + docWriter->getDocStoreSegment() +
                     " docStoreOffset=" + 
Misc::toString(docWriter->getDocStoreOffset()) +
                     " flushDocs=" + Misc::toString(flushDocs) +
-                    " flushDeletes=" + Misc::toString(flushDeletes) +
+                    //" flushDeletes=" + Misc::toString(flushDeletes) +
                     " flushDocStores=" + Misc::toString(_flushDocStores) +
                     " numDocs=" + Misc::toString(numDocs) +
                     " numBufDelTerms=" + 
Misc::toString(docWriter->getNumBufferedDeleteTerms()));
@@ -2290,13 +2231,7 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
         // If we are flushing docs, segment must not be NULL:
         assert(!segment.empty() || !flushDocs);
 
-        if (flushDocs || flushDeletes) {
-
-            SegmentInfos *rollback = NULL;
-
-            if (flushDeletes)
-                rollback = segmentInfos->clone();
-
+        if (flushDocs) {
             bool success = false;
 
             try {
@@ -2323,13 +2258,6 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
                     segmentInfos->insert(newSegment);
                 }
 
-                if (flushDeletes)
-                    // we should be able to change this so we can
-                    // buffer deletes longer and then flush them to
-                    // multiple flushed segments, when
-                    // autoCommit=false
-                    applyDeletes(flushDocs);
-
                 doAfterFlush();
 
                 checkpoint();
@@ -2339,44 +2267,16 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
                     if (!success) {
                         if (infoStream != NULL)
                             message("hit exception flushing segment " + 
segment);
-
-                        if (flushDeletes) {
-
-                            // Carefully check if any partial .del files
-                            // should be removed:
-                            const int32_t size = rollback->size();
-                            for (int32_t i = 0; i < size; i++) {
-                                const string newDelFileName = 
segmentInfos->info(i)->getDelFileName();
-                                const string delFileName = 
rollback->info(i)->getDelFileName();
-                                if (!newDelFileName.empty() && 
newDelFileName.compare(delFileName) != 0)
-                                    
deleter->deleteFile(newDelFileName.c_str());
-                            }
-
-                            // Fully replace the segmentInfos since flushed
-                            // deletes could have changed any of the
-                            // SegmentInfo instances:
-                            segmentInfos->clear();
-                            assert(false);//test me..
-                            segmentInfos->insert(rollback, false);
-
-                        } else {
+                        {
                             // Remove segment we added, if any:
                             if (newSegment != NULL &&
                                 segmentInfos->size() > 0 &&
                                 segmentInfos->info(segmentInfos->size() - 1) 
== newSegment)
                                 segmentInfos->remove(segmentInfos->size() - 1);
                         }
-                        if (flushDocs) {}
-                        //sdocWriter->abort(NULL);
+                        docWriter->abort(NULL);
                         deletePartialSegmentsFile();
-                        deleter->checkpoint(segmentInfos, false);
-
-                        if (!segment.empty())
-                            deleter->refresh(segment.c_str());
-                    } else if (flushDeletes)
-                            _CLDELETE(rollback);)
-
-            deleter->checkpoint(segmentInfos, autoCommit);
+                    })
 
             if (flushDocs && mergePolicy->useCompoundFile(segmentInfos,
                                                           newSegment)) {
@@ -2392,11 +2292,8 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
                             if (infoStream != NULL)
                                 message("hit exception creating compound file 
for newly flushed segment " + segment);
                             newSegment->setUseCompoundFile(false);
-                            deleter->deleteFile((segment + "." + 
IndexFileNames::COMPOUND_FILE_EXTENSION).c_str());
                             deletePartialSegmentsFile();
                         })
-
-                deleter->checkpoint(segmentInfos, autoCommit);
             }
 
             ret = true;
@@ -2470,7 +2367,6 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge 
*_merge) {
 
         assert(_merge->increfDone);
         decrefMergeSegments(_merge);
-        deleter->refresh(_merge->info->name.c_str());
         return false;
     }
 
@@ -2566,7 +2462,6 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge 
*_merge) {
             if (!success) {
                 if (infoStream != NULL)
                     message(string("hit exception creating merged deletes 
file"));
-                deleter->refresh(_merge->info->name.c_str());
             })
 
     // Simple optimization: if the doc store we are using
@@ -2607,17 +2502,11 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge 
*_merge) {
                 segmentInfos->clear();
                 segmentInfos->insert(rollback, true);
                 deletePartialSegmentsFile();
-                deleter->refresh(_merge->info->name.c_str());
             } _CLDELETE(rollback);)
 
     if (_merge->optimize)
         segmentsToOptimize->push_back(_merge->info);
 
-    // Must checkpoint before decrefing so any newly
-    // referenced files in the _CLNEW merge->info are incref'd
-    // first:
-    deleter->checkpoint(segmentInfos, autoCommit);
-
     decrefMergeSegments(_merge);
 
     return true;
@@ -2631,10 +2520,6 @@ void 
IndexWriter::decrefMergeSegments(MergePolicy::OneMerge *_merge) {
     _merge->increfDone = false;
     for (int32_t i = 0; i < numSegmentsToMerge; i++) {
         SegmentInfo *previousInfo = sourceSegmentsClone->info(i);
-        // Decref all files for this SegmentInfo (this
-        // matches the incref in mergeInit):
-        if (previousInfo->dir == directory)
-            deleter->decRef(previousInfo->files());
     }
 }
 
@@ -2679,8 +2564,6 @@ void IndexWriter::merge(MergePolicy::OneMerge *_merge) {
                             if (infoStream != NULL)
                                 message(string("hit exception during merge"));
                             addMergeException(_merge);
-                            if (_merge->info != NULL && 
segmentInfos->indexOf(_merge->info) == -1)
-                                deleter->refresh(_merge->info->name.c_str());
                         }
 
                         // This merge (and, generally, any change to the
@@ -2870,11 +2753,6 @@ void IndexWriter::_mergeInit(MergePolicy::OneMerge 
*_merge) {
 
     for (int32_t i = 0; i < end; i++) {
         SegmentInfo *si = _merge->segmentsClone->info(i);
-
-        // IncRef all files for this segment info to make sure
-        // they are not removed while we are trying to merge->
-        if (si->dir == directory)
-            deleter->incRef(si->files());
     }
 
     _merge->increfDone = true;
@@ -2968,7 +2846,6 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
                 {
                     SCOPED_LOCK_MUTEX(this->THIS_LOCK)
                     addMergeException(_merge);
-                    deleter->refresh(mergedName.c_str());
                 }
             })
 
@@ -3019,7 +2896,6 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
                         SCOPED_LOCK_MUTEX(this->THIS_LOCK)
                         if (!skip)
                             addMergeException(_merge);
-                        deleter->deleteFile(compoundFileName.c_str());
                     }
                 })
 
@@ -3027,18 +2903,12 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
 
             {
                 SCOPED_LOCK_MUTEX(this->THIS_LOCK)
-                if (skip || segmentInfos->indexOf(_merge->info) == -1 || 
_merge->isAborted()) {
-                    // Our segment (committed in non-compound
-                    // format) got merged away while we were
-                    // building the compound format.
-                    deleter->deleteFile(compoundFileName.c_str());
-                } else {
-                    success = false;
-                    try {
+                success = false;
+                try {
                         _merge->info->setUseCompoundFile(true);
                         checkpoint();
                         success = true;
-                    }
+                }
                     _CLFINALLY(
                             if (!success) {
                                 if (infoStream != NULL)
@@ -3048,12 +2918,7 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
                                 addMergeException(_merge);
                                 _merge->info->setUseCompoundFile(false);
                                 deletePartialSegmentsFile();
-                                deleter->deleteFile(compoundFileName.c_str());
                             })
-
-                    // Give deleter a chance to remove files now.
-                    deleter->checkpoint(segmentInfos, autoCommit);
-                }
             }
         }
     }
@@ -3083,7 +2948,6 @@ void IndexWriter::deletePartialSegmentsFile() {
         if (infoStream != NULL)
             message("now delete partial segments file \"" + segmentFileName + 
"\"");
 
-        deleter->deleteFile(segmentFileName.c_str());
     }
 }
 
diff --git a/src/core/CLucene/index/IndexWriter.h 
b/src/core/CLucene/index/IndexWriter.h
index 1fef37e5d7c..33df65cad46 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -230,7 +230,7 @@ class CLUCENE_EXPORT IndexWriter:LUCENE_BASE {
 
 
   bool commitPending; // true if segmentInfos has changes not yet committed
-  SegmentInfos* rollbackSegmentInfos;      // segmentInfos we will fallback to 
if the commit fails
+  //SegmentInfos* rollbackSegmentInfos;      // segmentInfos we will fallback 
to if the commit fails
 
   SegmentInfos* localRollbackSegmentInfos;      // segmentInfos we will 
fallback to if the commit fails
   bool localAutoCommit;                // saved autoCommit during local 
transaction
@@ -240,7 +240,7 @@ class CLUCENE_EXPORT IndexWriter:LUCENE_BASE {
   //typedef SDocumentsWriter<char> SDocumentsWriterType;
   //SDocumentsWriterType *sdocWriter;
   //lucene::index::SDocumentsWriter<char>* sdocWriter;
-  IndexFileDeleter* deleter;
+  //IndexFileDeleter* deleter;
 
   typedef std::vector<SegmentInfo*> SegmentsToOptimizeType;
   SegmentsToOptimizeType* segmentsToOptimize;           // used by optimize to 
note those needing optimization


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to