This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch clucene in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push: new 1936207ae4 [Feature](term info) add ram used function for term info reader (#216) 1936207ae4 is described below commit 1936207ae4345e45fcb2b4e6b1ed1a318f8c1b1b Author: airborne12 <airborn...@gmail.com> AuthorDate: Tue May 21 12:49:24 2024 +0800 [Feature](term info) add ram used function for term info reader (#216) --- src/core/CLucene/index/IndexReader.cpp | 4 ++++ src/core/CLucene/index/IndexReader.h | 1 + src/core/CLucene/index/TermInfosReader.cpp | 26 +++++++++++++++++++------- src/core/CLucene/index/_MultiSegmentReader.h | 8 ++++++++ src/core/CLucene/index/_SegmentHeader.h | 4 ++++ src/core/CLucene/index/_TermInfosReader.h | 10 ++++++++-- 6 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/core/CLucene/index/IndexReader.cpp b/src/core/CLucene/index/IndexReader.cpp index a63b8d1ad8..5b9f8ad262 100644 --- a/src/core/CLucene/index/IndexReader.cpp +++ b/src/core/CLucene/index/IndexReader.cpp @@ -194,6 +194,10 @@ CL_NS_DEF(index) _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this method."); } + int64_t IndexReader::getTermInfosRAMUsed() const { + _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this method."); + } + bool IndexReader::isCurrent() { _CLTHROWA(CL_ERR_UnsupportedOperation, "This reader does not support this method."); } diff --git a/src/core/CLucene/index/IndexReader.h b/src/core/CLucene/index/IndexReader.h index 060c0545ff..4307a0d933 100644 --- a/src/core/CLucene/index/IndexReader.h +++ b/src/core/CLucene/index/IndexReader.h @@ -380,6 +380,7 @@ public: * current indexDivisor. * @see #setTermInfosIndexDivisor */ int32_t getTermInfosIndexDivisor(); + virtual int64_t getTermInfosRAMUsed() const; /** * Check whether this IndexReader is still using the diff --git a/src/core/CLucene/index/TermInfosReader.cpp b/src/core/CLucene/index/TermInfosReader.cpp index 6cf8b42fe4..9044d1d36a 100644 --- a/src/core/CLucene/index/TermInfosReader.cpp +++ b/src/core/CLucene/index/TermInfosReader.cpp @@ -129,18 +129,23 @@ void TermInfosReader::close() { } #endif } + numBytesUsed -= (sizeof(Term) * indexTermsLength + sizeof(TermInfo) * indexTermsLength + sizeof(int64_t) * indexTermsLength); //Delete the arrays - if (indexTerms) { + if (indexTerms != nullptr) { + for (int32_t i = 0; i < indexTermsLength; ++i) { + numBytesUsed -= indexTerms[i].textLength(); + } delete[] indexTerms; indexTerms = NULL; } - if (indexInfos) { + if (indexInfos != nullptr) { + numBytesUsed -= sizeof(TermInfo) * indexTermsLength; _CLDELETE_ARRAY(indexInfos); indexInfos = NULL; } - //Delete the arrays - if (indexPointers) { + if (indexPointers != NULL) { + numBytesUsed -= sizeof(int64_t) * indexTermsLength; _CLDELETE_ARRAY(indexPointers); indexPointers = NULL; } @@ -324,9 +329,6 @@ void TermInfosReader::ensureIndexIsRead() { SCOPED_LOCK_MUTEX(THIS_LOCK) if (indexIsRead) return; - - //https://jira.qianxin-inc.cn/browse/XHBUG-2921 - //https://jira.qianxin-inc.cn/browse/XHBUG-3053 if (indexEnum == NULL) _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL"); try { @@ -360,6 +362,16 @@ void TermInfosReader::ensureIndexIsRead() { if (!indexEnum->next()) break; } indexIsRead = true; + numBytesUsed = sizeof(Term) * indexTermsLength + sizeof(TermInfo) * indexTermsLength + sizeof(int64_t) * indexTermsLength; + for (int32_t i = 0; i < indexTermsLength; ++i) { + numBytesUsed += indexTerms[i].textLength(); + } + if (indexInfos != NULL) { + numBytesUsed += sizeof(TermInfo) * indexTermsLength; + } + if (indexPointers != NULL) { + numBytesUsed += sizeof(int64_t) * indexTermsLength; + } } _CLFINALLY(indexEnum->close(); //Close and delete the IndexInput is. The close is done by the destructor. diff --git a/src/core/CLucene/index/_MultiSegmentReader.h b/src/core/CLucene/index/_MultiSegmentReader.h index ad600824fa..c5f8deeea2 100644 --- a/src/core/CLucene/index/_MultiSegmentReader.h +++ b/src/core/CLucene/index/_MultiSegmentReader.h @@ -123,6 +123,14 @@ public: const char* getObjectName() const; IndexVersion getIndexVersion() override; + + int64_t getTermInfosRAMUsed() const override { + int64_t size = 0; + for (size_t i = 0; i < subReaders->length; i++) { + size += (*subReaders)[i]->getTermInfosRAMUsed(); + } + return size; + } }; diff --git a/src/core/CLucene/index/_SegmentHeader.h b/src/core/CLucene/index/_SegmentHeader.h index c1f01e7cec..6bf7d1819b 100644 --- a/src/core/CLucene/index/_SegmentHeader.h +++ b/src/core/CLucene/index/_SegmentHeader.h @@ -321,6 +321,10 @@ class SegmentReader: public DirectoryIndexReader { void initialize(SegmentInfo* si, int32_t readBufferSize, bool doOpenStores, bool doingReopen); + int64_t getTermInfosRAMUsed() const override { + return tis->getRAMUsed(); + } + /** * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. * @return TermVectorsReader diff --git a/src/core/CLucene/index/_TermInfosReader.h b/src/core/CLucene/index/_TermInfosReader.h index c2b41aca6c..a9a993795e 100644 --- a/src/core/CLucene/index/_TermInfosReader.h +++ b/src/core/CLucene/index/_TermInfosReader.h @@ -47,7 +47,9 @@ CL_NS_DEF(index) int32_t indexDivisor; int32_t totalIndexInterval; - DEFINE_MUTEX(THIS_LOCK) + int64_t numBytesUsed; + + DEFINE_MUTEX(THIS_LOCK) public: /** @@ -102,7 +104,11 @@ CL_NS_DEF(index) /** Returns the TermInfo for a Term in the set, or null. */ TermInfo* get(const Term* term); - private: + + int64_t getRAMUsed() const { + return numBytesUsed; + } + private: /** Reads the term info index file or .tti file. */ void ensureIndexIsRead(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org