This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git


The following commit(s) were added to refs/heads/clucene by this push:
     new 1936207ae4 [Feature](term info) add ram used function for term info 
reader (#216)
1936207ae4 is described below

commit 1936207ae4345e45fcb2b4e6b1ed1a318f8c1b1b
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Tue May 21 12:49:24 2024 +0800

    [Feature](term info) add ram used function for term info reader (#216)
---
 src/core/CLucene/index/IndexReader.cpp       |  4 ++++
 src/core/CLucene/index/IndexReader.h         |  1 +
 src/core/CLucene/index/TermInfosReader.cpp   | 26 +++++++++++++++++++-------
 src/core/CLucene/index/_MultiSegmentReader.h |  8 ++++++++
 src/core/CLucene/index/_SegmentHeader.h      |  4 ++++
 src/core/CLucene/index/_TermInfosReader.h    | 10 ++++++++--
 6 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/src/core/CLucene/index/IndexReader.cpp 
b/src/core/CLucene/index/IndexReader.cpp
index a63b8d1ad8..5b9f8ad262 100644
--- a/src/core/CLucene/index/IndexReader.cpp
+++ b/src/core/CLucene/index/IndexReader.cpp
@@ -194,6 +194,10 @@ CL_NS_DEF(index)
     _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this 
method.");
   }
 
+  int64_t IndexReader::getTermInfosRAMUsed() const {
+      _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support 
this method.");
+  }
+
   bool IndexReader::isCurrent() {
     _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this 
method.");
   }
diff --git a/src/core/CLucene/index/IndexReader.h 
b/src/core/CLucene/index/IndexReader.h
index 060c0545ff..4307a0d933 100644
--- a/src/core/CLucene/index/IndexReader.h
+++ b/src/core/CLucene/index/IndexReader.h
@@ -380,6 +380,7 @@ public:
    *  current indexDivisor.
    *  @see #setTermInfosIndexDivisor */
   int32_t getTermInfosIndexDivisor();
+  virtual int64_t getTermInfosRAMUsed() const;
 
   /**
    * Check whether this IndexReader is still using the
diff --git a/src/core/CLucene/index/TermInfosReader.cpp 
b/src/core/CLucene/index/TermInfosReader.cpp
index 6cf8b42fe4..9044d1d36a 100644
--- a/src/core/CLucene/index/TermInfosReader.cpp
+++ b/src/core/CLucene/index/TermInfosReader.cpp
@@ -129,18 +129,23 @@ void TermInfosReader::close() {
         }
 #endif
     }
+    numBytesUsed -= (sizeof(Term) * indexTermsLength + sizeof(TermInfo) * 
indexTermsLength + sizeof(int64_t) * indexTermsLength);
     //Delete the arrays
-    if (indexTerms) {
+    if (indexTerms != nullptr) {
+        for (int32_t i = 0; i < indexTermsLength; ++i) {
+            numBytesUsed -= indexTerms[i].textLength();
+        }
         delete[] indexTerms;
         indexTerms = NULL;
     }
-    if (indexInfos) {
+    if (indexInfos != nullptr) {
+        numBytesUsed -= sizeof(TermInfo) * indexTermsLength;
         _CLDELETE_ARRAY(indexInfos);
         indexInfos = NULL;
     }
-
     //Delete the arrays
-    if (indexPointers) {
+    if (indexPointers != NULL) {
+        numBytesUsed -= sizeof(int64_t) * indexTermsLength;
         _CLDELETE_ARRAY(indexPointers);
         indexPointers = NULL;
     }
@@ -324,9 +329,6 @@ void TermInfosReader::ensureIndexIsRead() {
     SCOPED_LOCK_MUTEX(THIS_LOCK)
 
     if (indexIsRead) return;
-
-    //https://jira.qianxin-inc.cn/browse/XHBUG-2921
-    //https://jira.qianxin-inc.cn/browse/XHBUG-3053
     if (indexEnum == NULL) _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
 
     try {
@@ -360,6 +362,16 @@ void TermInfosReader::ensureIndexIsRead() {
                 if (!indexEnum->next()) break;
         }
         indexIsRead = true;
+        numBytesUsed = sizeof(Term) * indexTermsLength + sizeof(TermInfo) * 
indexTermsLength + sizeof(int64_t) * indexTermsLength;
+        for (int32_t i = 0; i < indexTermsLength; ++i) {
+            numBytesUsed += indexTerms[i].textLength();
+        }
+        if (indexInfos != NULL) {
+            numBytesUsed += sizeof(TermInfo) * indexTermsLength;
+        }
+        if (indexPointers != NULL) {
+            numBytesUsed += sizeof(int64_t) * indexTermsLength;
+        }
     }
     _CLFINALLY(indexEnum->close();
                //Close and delete the IndexInput is. The close is done by the 
destructor.
diff --git a/src/core/CLucene/index/_MultiSegmentReader.h 
b/src/core/CLucene/index/_MultiSegmentReader.h
index ad600824fa..c5f8deeea2 100644
--- a/src/core/CLucene/index/_MultiSegmentReader.h
+++ b/src/core/CLucene/index/_MultiSegmentReader.h
@@ -123,6 +123,14 @@ public:
   const char* getObjectName() const;
 
   IndexVersion getIndexVersion() override;
+
+  int64_t getTermInfosRAMUsed() const override {
+      int64_t size = 0;
+      for (size_t i = 0; i < subReaders->length; i++) {
+          size += (*subReaders)[i]->getTermInfosRAMUsed();
+      }
+      return size;
+  }
 };
 
 
diff --git a/src/core/CLucene/index/_SegmentHeader.h 
b/src/core/CLucene/index/_SegmentHeader.h
index c1f01e7cec..6bf7d1819b 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -321,6 +321,10 @@ class SegmentReader: public DirectoryIndexReader {
 
   void initialize(SegmentInfo* si, int32_t readBufferSize, bool doOpenStores, 
bool doingReopen);
 
+  int64_t getTermInfosRAMUsed() const override {
+      return tis->getRAMUsed();
+  }
+
   /**
    * Create a clone from the initial TermVectorsReader and store it in the 
ThreadLocal.
    * @return TermVectorsReader
diff --git a/src/core/CLucene/index/_TermInfosReader.h 
b/src/core/CLucene/index/_TermInfosReader.h
index c2b41aca6c..a9a993795e 100644
--- a/src/core/CLucene/index/_TermInfosReader.h
+++ b/src/core/CLucene/index/_TermInfosReader.h
@@ -47,7 +47,9 @@ CL_NS_DEF(index)
                int32_t indexDivisor;
                int32_t totalIndexInterval;
 
-               DEFINE_MUTEX(THIS_LOCK)
+        int64_t numBytesUsed;
+
+        DEFINE_MUTEX(THIS_LOCK)
 
        public:
                /**
@@ -102,7 +104,11 @@ CL_NS_DEF(index)
                
                /** Returns the TermInfo for a Term in the set, or null. */
                TermInfo* get(const Term* term);
-       private:
+
+        int64_t getRAMUsed() const {
+            return numBytesUsed;
+        }
+    private:
                /** Reads the term info index file or .tti file. */
                void ensureIndexIsRead();
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to