This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch clucene-3.0
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene-3.0 by this push:
     new 8d8f92ef8dd branch-3.0: [fix](index reader) fix memory leak  (#334)
8d8f92ef8dd is described below

commit 8d8f92ef8ddd0e50b6fc76f8f6572abaef1b5213
Author: airborne12 <[email protected]>
AuthorDate: Thu Jul 17 22:41:36 2025 +0800

    branch-3.0: [fix](index reader) fix memory leak  (#334)
    
    * [fix](index) fix memory leak in DirectoryIndexReader and improve abort 
handling (#329)
    
    * [fix](index reader) fix memory leak after exception in infos read (#331)
    
    * [fix](index writer) remove unused deleter code (#328)
---
 src/core/CLucene/index/DirectoryIndexReader.cpp |   8 +-
 src/core/CLucene/index/IndexWriter.cpp          | 184 ++++--------------------
 src/core/CLucene/index/IndexWriter.h            |   4 +-
 src/core/CLucene/index/SDocumentWriter.h        |   4 +-
 4 files changed, 36 insertions(+), 164 deletions(-)

diff --git a/src/core/CLucene/index/DirectoryIndexReader.cpp 
b/src/core/CLucene/index/DirectoryIndexReader.cpp
index 24f7c105e01..32adf489460 100644
--- a/src/core/CLucene/index/DirectoryIndexReader.cpp
+++ b/src/core/CLucene/index/DirectoryIndexReader.cpp
@@ -148,11 +148,17 @@ CL_NS_DEF(index)
     DirectoryIndexReader* doBody(const char* segmentFileName) {
 
       SegmentInfos* infos = _CLNEW SegmentInfos;
-      infos->read(directory, segmentFileName);
+      try {
+        infos->read(directory, segmentFileName);
+      } catch (CLuceneError& e) {
+        _CLDELETE(infos);
+        throw e;
+      }
 
       DirectoryIndexReader* reader;
 
       if (infos->size() == 0) {
+        _CLDELETE(infos);
         _CLTHROWA(CL_ERR_EmptyIndexSegment, "The number of index segments is 
0, indicating incorrect index generation.");
       } else if (infos->size() == 1) {          // index is optimized
         reader = SegmentReader::get(infos, infos->info(0), readBufferSize_, 
closeDirectory);
diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index ae03dcb8fed..c26e5c68cb6 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -5,6 +5,7 @@
 * the GNU Lesser General Public License, as specified in the COPYING file.
 
------------------------------------------------------------------------------*/
 #include "CLucene/_ApiHeader.h"
+#include "CLucene/index/IndexWriter.h"
 
 #include "CLucene/analysis/AnalysisHeader.h"
 #include "CLucene/analysis/Analyzers.h"
@@ -14,11 +15,8 @@
 #include "CLucene/store/Directory.h"
 #include "CLucene/util/Misc.h"
 #include "CLucene/util/PFORUtil.h"
-#include "IndexReader.h"
-#include "IndexWriter.h"
-
+#include "CLucene/index/IndexReader.h"
 #include "CLucene/index/MergePolicy.h"
-#include "CLucene/search/Similarity.h"
 #include "CLucene/store/FSDirectory.h"
 #include "CLucene/store/_Lock.h"
 #include "CLucene/store/_RAMDirectory.h"
@@ -26,17 +24,18 @@
 #include "CLucene/util/PriorityQueue.h"
 #include "CLucene/index/CodeMode.h"
 #include "CLucene/analysis/standard95/StandardAnalyzer.h"
-#include "MergePolicy.h"
-#include "MergeScheduler.h"
-#include "SDocumentWriter.h"
-#include "_DocumentsWriter.h"
-#include "_IndexFileDeleter.h"
-#include "_SegmentHeader.h"
-#include "_SegmentInfos.h"
-#include "_SegmentMerger.h"
-#include "_SkipListWriter.h"
-#include "_Term.h"
-#include "_TermInfo.h"
+#include "CLucene/index/MergePolicy.h"
+#include "CLucene/index/MergeScheduler.h"
+#include "CLucene/index/SDocumentWriter.h"
+#include "CLucene/index/_DocumentsWriter.h"
+#include "CLucene/index/_IndexFileDeleter.h"
+#include "CLucene/index/_SegmentHeader.h"
+#include "CLucene/index/_SegmentInfos.h"
+#include "CLucene/index/_SegmentMerger.h"
+#include "CLucene/index/_SkipListWriter.h"
+#include "CLucene/index/_Term.h"
+#include "CLucene/index/_TermInfo.h"
+
 #include <algorithm>
 #include <memory>
 #include <assert.h>
@@ -106,7 +105,6 @@ void IndexWriter::deinit(bool releaseWriteLock) throw() {
     _CLLDELETE(segmentsToOptimize);
     _CLLDELETE(mergeScheduler);
     _CLLDELETE(mergePolicy);
-    _CLLDELETE(deleter);
     _CLLDELETE(docWriter);
     if (bOwnsDirectory) _CLLDECDELETE(directory);
     delete _internal;
@@ -252,8 +250,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
     this->autoCommit = true;
     this->segmentInfos = _CLNEW SegmentInfos;
     this->mergeGen = 0;
-    this->rollbackSegmentInfos = NULL;
-    this->deleter = NULL;
     this->docWriter = NULL;
     this->writeLock = NULL;
     this->fieldInfos = NULL;
@@ -294,11 +290,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
         }
 
         this->autoCommit = autoCommit;
-        if (!autoCommit) {
-            rollbackSegmentInfos = segmentInfos->clone();
-        } else {
-            rollbackSegmentInfos = NULL;
-        }
         if (analyzer != nullptr) {
             if (analyzer->isSDocOpt()) {
                 docWriter = _CLNEW SDocumentsWriter<char>(directory, this);
@@ -308,11 +299,6 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
         } else {
             _CLTHROWA(CL_ERR_IllegalArgument, "IndexWriter::init: Only support 
SDocumentsWriter");
         }
-        // Default deleter (for backwards compatibility) is
-        // KeepOnlyLastCommitDeleter:
-        deleter = _CLNEW IndexFileDeleter(directory,
-                                          deletionPolicy == NULL ? _CLNEW 
KeepOnlyLastCommitDeletionPolicy() : deletionPolicy,
-                                          segmentInfos, infoStream, docWriter);
 
         pushMaxBufferedDocs();
 
@@ -476,7 +462,6 @@ void IndexWriter::setInfoStream(std::ostream *infoStream) {
     this->infoStream = infoStream;
     setMessageID();
     docWriter->setInfoStream(infoStream);
-    deleter->setInfoStream(infoStream);
     if (infoStream != NULL)
         messageState();
 }
@@ -587,19 +572,14 @@ void IndexWriter::closeInternal(bool waitForMerges) {
                 if (infoStream != NULL)
                     message("close: wrote segments file \"" + 
segmentInfos->getCurrentSegmentFileName() + "\"");
 
-                deleter->checkpoint(segmentInfos, true);
-
                 commitPending = false;
                 //        _CLDELETE(rollbackSegmentInfos);
             }
-            _CLDELETE(rollbackSegmentInfos);
-
 
             if (infoStream != NULL)
                 message("at close: " + segString());
 
             _CLDELETE(docWriter);
-            deleter->close();
         }
 
         if (closeDir)
@@ -688,11 +668,8 @@ bool IndexWriter::flushDocStores() {
                                 
si->getDocStoreSegment().compare(docStoreSegment) == 0)
                                 si->setDocStoreIsCompoundFile(false);
                         }
-                        deleter->deleteFile(compoundFileName.c_str());
                         deletePartialSegmentsFile();
                     })
-
-            deleter->checkpoint(segmentInfos, false);
         }
     }
 
@@ -798,15 +775,6 @@ void IndexWriter::updateDocument(Term *term, Document 
*doc, Analyzer *analyzer)
                 if (!success) {
                     if (infoStream != NULL)
                         message(string("hit exception updating document"));
-
-                    {
-                        SCOPED_LOCK_MUTEX(this->THIS_LOCK)
-                        // If docWriter has some aborted files that were
-                        // never incref'd, then we clean them up here
-                        const std::vector<std::string> *files = 
docWriter->abortedFiles();
-                        if (files != NULL)
-                            deleter->deleteNewFiles(*files);
-                    }
                 })
         if (doFlush)
             flush(true, false);
@@ -1032,10 +1000,7 @@ void IndexWriter::startTransaction() {
         flush();
         // Turn off auto-commit during our local transaction:
         autoCommit = false;
-    } else
-        // We must "protect" our files at this point from
-        // deletion in case we need to rollback:
-        deleter->incRef(segmentInfos, false);
+    }
 }
 
 void IndexWriter::rollbackTransaction() {
@@ -1055,15 +1020,6 @@ void IndexWriter::rollbackTransaction() {
     segmentInfos->insert(localRollbackSegmentInfos, true);
     _CLDELETE(localRollbackSegmentInfos);
 
-    // Ask deleter to locate unreferenced files we had
-    // created & remove them:
-    deleter->checkpoint(segmentInfos, false);
-
-    if (!autoCommit)
-        // Remove the incRef we did in startTransaction:
-        deleter->decRef(segmentInfos);
-
-    deleter->refresh();
     finishMerges(false);
     stopMerges = false;
 }
@@ -1089,15 +1045,6 @@ void IndexWriter::commitTransaction() {
 
                 rollbackTransaction();
             })
-
-    if (!autoCommit)
-        // Remove the incRef we did in startTransaction.
-        deleter->decRef(localRollbackSegmentInfos);
-
-    _CLDELETE(localRollbackSegmentInfos);
-
-    // Give deleter a chance to remove files now:
-    deleter->checkpoint(segmentInfos, autoCommit);
 }
 
 void IndexWriter::abort() {
@@ -1134,14 +1081,8 @@ void IndexWriter::abort() {
             // will always write to a _CLNEW generation ("write
             // once").
             segmentInfos->clear();
-            segmentInfos->insert(rollbackSegmentInfos, false);
 
             docWriter->abort(NULL);
-
-            // Ask deleter to locate unreferenced files & remove
-            // them:
-            deleter->checkpoint(segmentInfos, false);
-            deleter->refresh();
         }
 
         commitPending = false;
@@ -2116,14 +2057,14 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
         // have to fix the "applyDeletesSelectively" logic to
         // apply to more than just the last flushed segment
         //bool flushDeletes = sdocWriter->hasDeletes();
-        bool flushDeletes = docWriter->hasDeletes();
+        //bool flushDeletes = docWriter->hasDeletes();
 
         if (infoStream != NULL) {
             message("  flush: segment=" + docWriter->getSegment() +
                     " docStoreSegment=" + docWriter->getDocStoreSegment() +
                     " docStoreOffset=" + 
Misc::toString(docWriter->getDocStoreOffset()) +
                     " flushDocs=" + Misc::toString(flushDocs) +
-                    " flushDeletes=" + Misc::toString(flushDeletes) +
+                    //" flushDeletes=" + Misc::toString(flushDeletes) +
                     " flushDocStores=" + Misc::toString(_flushDocStores) +
                     " numDocs=" + Misc::toString(numDocs) +
                     " numBufDelTerms=" + 
Misc::toString(docWriter->getNumBufferedDeleteTerms()));
@@ -2158,13 +2099,7 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
         // If we are flushing docs, segment must not be NULL:
         assert(!segment.empty() || !flushDocs);
 
-        if (flushDocs || flushDeletes) {
-
-            SegmentInfos *rollback = NULL;
-
-            if (flushDeletes)
-                rollback = segmentInfos->clone();
-
+        if (flushDocs) {
             bool success = false;
 
             try {
@@ -2191,13 +2126,6 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
                     segmentInfos->insert(newSegment);
                 }
 
-                if (flushDeletes)
-                    // we should be able to change this so we can
-                    // buffer deletes longer and then flush them to
-                    // multiple flushed segments, when
-                    // autoCommit=false
-                    applyDeletes(flushDocs);
-
                 doAfterFlush();
 
                 checkpoint();
@@ -2207,44 +2135,16 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
                     if (!success) {
                         if (infoStream != NULL)
                             message("hit exception flushing segment " + 
segment);
-
-                        if (flushDeletes) {
-
-                            // Carefully check if any partial .del files
-                            // should be removed:
-                            const int32_t size = rollback->size();
-                            for (int32_t i = 0; i < size; i++) {
-                                const string newDelFileName = 
segmentInfos->info(i)->getDelFileName();
-                                const string delFileName = 
rollback->info(i)->getDelFileName();
-                                if (!newDelFileName.empty() && 
newDelFileName.compare(delFileName) != 0)
-                                    
deleter->deleteFile(newDelFileName.c_str());
-                            }
-
-                            // Fully replace the segmentInfos since flushed
-                            // deletes could have changed any of the
-                            // SegmentInfo instances:
-                            segmentInfos->clear();
-                            assert(false);//test me..
-                            segmentInfos->insert(rollback, false);
-
-                        } else {
+                        {
                             // Remove segment we added, if any:
                             if (newSegment != NULL &&
                                 segmentInfos->size() > 0 &&
                                 segmentInfos->info(segmentInfos->size() - 1) 
== newSegment)
                                 segmentInfos->remove(segmentInfos->size() - 1);
                         }
-                        if (flushDocs) {}
-                        //sdocWriter->abort(NULL);
+                        docWriter->abort(NULL);
                         deletePartialSegmentsFile();
-                        deleter->checkpoint(segmentInfos, false);
-
-                        if (!segment.empty())
-                            deleter->refresh(segment.c_str());
-                    } else if (flushDeletes)
-                            _CLDELETE(rollback);)
-
-            deleter->checkpoint(segmentInfos, autoCommit);
+                    })
 
             if (flushDocs && mergePolicy->useCompoundFile(segmentInfos,
                                                           newSegment)) {
@@ -2260,11 +2160,8 @@ bool IndexWriter::doFlush(bool _flushDocStores) {
                             if (infoStream != NULL)
                                 message("hit exception creating compound file 
for newly flushed segment " + segment);
                             newSegment->setUseCompoundFile(false);
-                            deleter->deleteFile((segment + "." + 
IndexFileNames::COMPOUND_FILE_EXTENSION).c_str());
                             deletePartialSegmentsFile();
                         })
-
-                deleter->checkpoint(segmentInfos, autoCommit);
             }
 
             ret = true;
@@ -2338,7 +2235,6 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge 
*_merge) {
 
         assert(_merge->increfDone);
         decrefMergeSegments(_merge);
-        deleter->refresh(_merge->info->name.c_str());
         return false;
     }
 
@@ -2434,7 +2330,6 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge 
*_merge) {
             if (!success) {
                 if (infoStream != NULL)
                     message(string("hit exception creating merged deletes 
file"));
-                deleter->refresh(_merge->info->name.c_str());
             })
 
     // Simple optimization: if the doc store we are using
@@ -2475,17 +2370,11 @@ bool IndexWriter::commitMerge(MergePolicy::OneMerge 
*_merge) {
                 segmentInfos->clear();
                 segmentInfos->insert(rollback, true);
                 deletePartialSegmentsFile();
-                deleter->refresh(_merge->info->name.c_str());
             } _CLDELETE(rollback);)
 
     if (_merge->optimize)
         segmentsToOptimize->push_back(_merge->info);
 
-    // Must checkpoint before decrefing so any newly
-    // referenced files in the _CLNEW merge->info are incref'd
-    // first:
-    deleter->checkpoint(segmentInfos, autoCommit);
-
     decrefMergeSegments(_merge);
 
     return true;
@@ -2499,10 +2388,6 @@ void 
IndexWriter::decrefMergeSegments(MergePolicy::OneMerge *_merge) {
     _merge->increfDone = false;
     for (int32_t i = 0; i < numSegmentsToMerge; i++) {
         SegmentInfo *previousInfo = sourceSegmentsClone->info(i);
-        // Decref all files for this SegmentInfo (this
-        // matches the incref in mergeInit):
-        if (previousInfo->dir == directory)
-            deleter->decRef(previousInfo->files());
     }
 }
 
@@ -2547,8 +2432,6 @@ void IndexWriter::merge(MergePolicy::OneMerge *_merge) {
                             if (infoStream != NULL)
                                 message(string("hit exception during merge"));
                             addMergeException(_merge);
-                            if (_merge->info != NULL && 
segmentInfos->indexOf(_merge->info) == -1)
-                                deleter->refresh(_merge->info->name.c_str());
                         }
 
                         // This merge (and, generally, any change to the
@@ -2738,11 +2621,6 @@ void IndexWriter::_mergeInit(MergePolicy::OneMerge 
*_merge) {
 
     for (int32_t i = 0; i < end; i++) {
         SegmentInfo *si = _merge->segmentsClone->info(i);
-
-        // IncRef all files for this segment info to make sure
-        // they are not removed while we are trying to merge->
-        if (si->dir == directory)
-            deleter->incRef(si->files());
     }
 
     _merge->increfDone = true;
@@ -2836,7 +2714,6 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
                 {
                     SCOPED_LOCK_MUTEX(this->THIS_LOCK)
                     addMergeException(_merge);
-                    deleter->refresh(mergedName.c_str());
                 }
             })
 
@@ -2887,7 +2764,6 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
                         SCOPED_LOCK_MUTEX(this->THIS_LOCK)
                         if (!skip)
                             addMergeException(_merge);
-                        deleter->deleteFile(compoundFileName.c_str());
                     }
                 })
 
@@ -2895,18 +2771,12 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
 
             {
                 SCOPED_LOCK_MUTEX(this->THIS_LOCK)
-                if (skip || segmentInfos->indexOf(_merge->info) == -1 || 
_merge->isAborted()) {
-                    // Our segment (committed in non-compound
-                    // format) got merged away while we were
-                    // building the compound format.
-                    deleter->deleteFile(compoundFileName.c_str());
-                } else {
-                    success = false;
-                    try {
+                success = false;
+                try {
                         _merge->info->setUseCompoundFile(true);
                         checkpoint();
                         success = true;
-                    }
+                }
                     _CLFINALLY(
                             if (!success) {
                                 if (infoStream != NULL)
@@ -2916,12 +2786,7 @@ int32_t IndexWriter::mergeMiddle(MergePolicy::OneMerge 
*_merge) {
                                 addMergeException(_merge);
                                 _merge->info->setUseCompoundFile(false);
                                 deletePartialSegmentsFile();
-                                deleter->deleteFile(compoundFileName.c_str());
                             })
-
-                    // Give deleter a chance to remove files now.
-                    deleter->checkpoint(segmentInfos, autoCommit);
-                }
             }
         }
     }
@@ -2951,7 +2816,6 @@ void IndexWriter::deletePartialSegmentsFile() {
         if (infoStream != NULL)
             message("now delete partial segments file \"" + segmentFileName + 
"\"");
 
-        deleter->deleteFile(segmentFileName.c_str());
     }
 }
 
diff --git a/src/core/CLucene/index/IndexWriter.h 
b/src/core/CLucene/index/IndexWriter.h
index be19a9a0bff..ea7bebe6bb1 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -227,7 +227,7 @@ class CLUCENE_EXPORT IndexWriter:LUCENE_BASE {
 
 
   bool commitPending; // true if segmentInfos has changes not yet committed
-  SegmentInfos* rollbackSegmentInfos;      // segmentInfos we will fallback to 
if the commit fails
+  //SegmentInfos* rollbackSegmentInfos;      // segmentInfos we will fallback 
to if the commit fails
 
   SegmentInfos* localRollbackSegmentInfos;      // segmentInfos we will 
fallback to if the commit fails
   bool localAutoCommit;                // saved autoCommit during local 
transaction
@@ -237,7 +237,7 @@ class CLUCENE_EXPORT IndexWriter:LUCENE_BASE {
   //typedef SDocumentsWriter<char> SDocumentsWriterType;
   //SDocumentsWriterType *sdocWriter;
   //lucene::index::SDocumentsWriter<char>* sdocWriter;
-  IndexFileDeleter* deleter;
+  //IndexFileDeleter* deleter;
 
   typedef std::vector<SegmentInfo*> SegmentsToOptimizeType;
   SegmentsToOptimizeType* segmentsToOptimize;           // used by optimize to 
note those needing optimization
diff --git a/src/core/CLucene/index/SDocumentWriter.h 
b/src/core/CLucene/index/SDocumentWriter.h
index 36a43fc57b3..a3be503baa3 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -755,7 +755,9 @@ public:
     int32_t getNumBufferedDeleteTerms() override {
         return 0;
     }
-    void abort(AbortException *ae) override {}
+    void abort(AbortException *ae) override {
+        resetPostingsData();
+    }
     void setMaxBufferedDocs(int32_t count) override {
         maxBufferedDocs = count;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to