[doris-thirdparty] branch clucene updated: [Optimize](search) Optimizemultiple terms conjunction query (#117)

jianliangqi Mon, 04 Sep 2023 01:38:43 -0700

This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git



The following commit(s) were added to refs/heads/clucene by this push:
     new e15a89a5 [Optimize](search) Optimizemultiple terms conjunction query 
(#117)
e15a89a5 is described below

commit e15a89a5627b1a2e88ef924bee6e0a3c6c6495a3
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Mon Sep 4 16:38:32 2023 +0800

    [Optimize](search) Optimizemultiple terms conjunction query (#117)
---
 .../CLucene/analysis/jieba/ChineseTokenizer.cpp    |   5 +-
 src/core/CLucene/document/Field.h                  |   7 +
 src/core/CLucene/index/CodeMode.h                  |   3 +-
 src/core/CLucene/index/DocRange.h                  |   7 +-
 src/core/CLucene/index/FieldInfos.cpp              |  31 +-
 src/core/CLucene/index/IndexReader.cpp             |   4 +
 src/core/CLucene/index/IndexReader.h               |   3 +
 src/core/CLucene/index/IndexVersion.h              |   8 +
 src/core/CLucene/index/IndexWriter.cpp             |  53 ++--
 src/core/CLucene/index/MultiSegmentReader.cpp      |  24 +-
 src/core/CLucene/index/SDocumentWriter.cpp         |  50 ++-
 src/core/CLucene/index/SDocumentWriter.h           |   1 +
 src/core/CLucene/index/SegmentReader.cpp           |   5 +
 src/core/CLucene/index/SegmentTermDocs.cpp         | 348 +++++++++------------
 src/core/CLucene/index/Terms.h                     |   4 +
 src/core/CLucene/index/_FieldInfos.h               |   8 +
 src/core/CLucene/index/_MultiSegmentReader.h       |   4 +
 src/core/CLucene/index/_SegmentHeader.h            |  69 ++++
 src/core/CLucene/search/query/DcoIdSetIterator.h   |  16 +
 src/core/CLucene/search/query/TermIterator.h       |  51 +++
 src/core/CMakeLists.txt                            |   2 +
 21 files changed, 435 insertions(+), 268 deletions(-)

diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp 
b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
index ef126c97..c72e2451 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
@@ -50,8 +50,9 @@ CL_NS(analysis)::Token 
*ChineseTokenizer::next(lucene::analysis::Token *token) {
         dataLen = tokens_text.size();
     }
     if (bufferIndex < dataLen) {
-        auto token_text = tokens_text[bufferIndex++];
-        token->setNoCopy(token_text.data(), 0, token_text.size());
+        std::string& token_text = tokens_text[bufferIndex++];
+        size_t size = std::min(token_text.size(), 
static_cast<size_t>(LUCENE_MAX_WORD_LEN));
+        token->setNoCopy(token_text.data(), 0, size);
         return token;
     }
     return nullptr;
diff --git a/src/core/CLucene/document/Field.h 
b/src/core/CLucene/document/Field.h
index 970d06ee..23c0ad17 100644
--- a/src/core/CLucene/document/Field.h
+++ b/src/core/CLucene/document/Field.h
@@ -9,6 +9,8 @@
 
 #include "CLucene/util/Array.h"
 #include "CLucene/util/Equators.h"
+#include "CLucene/index/IndexVersion.h"
+
 /*
 Fieldable reading:
 
https://issues.apache.org/jira/browse/LUCENE-1219?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-
 tabpanel&focusedCommentId=12578199#action_12578199
@@ -310,6 +312,9 @@ public:
        virtual const char* getObjectName() const;
        static const char* getClassName();
 
+       void setIndexVersion(IndexVersion indexVersion) { indexVersion_ = 
indexVersion; }
+       IndexVersion getIndexVersion() { return indexVersion_; }
+
 protected:
        /**
        * Set configs using XOR. This resets all the settings
@@ -327,6 +332,8 @@ protected:
        const TCHAR* _name;
        uint32_t config;
        float_t boost;
+
+       IndexVersion indexVersion_ = IndexVersion::kV1;
 };
 CL_NS_END
 #endif
diff --git a/src/core/CLucene/index/CodeMode.h 
b/src/core/CLucene/index/CodeMode.h
index 5ac3718f..3c39e94e 100644
--- a/src/core/CLucene/index/CodeMode.h
+++ b/src/core/CLucene/index/CodeMode.h
@@ -4,7 +4,8 @@ CL_NS_DEF(index)
 
 enum class CodeMode { 
   kDefault = 0,
-  kPfor = 1
+  kPfor = 1,
+  kRange = 2
 };
 
 CL_NS_END
\ No newline at end of file
diff --git a/src/core/CLucene/index/DocRange.h 
b/src/core/CLucene/index/DocRange.h
index 81b27047..ef7906a2 100644
--- a/src/core/CLucene/index/DocRange.h
+++ b/src/core/CLucene/index/DocRange.h
@@ -15,15 +15,16 @@ enum class DocRangeType {
 
 class DocRange {
  public:
-  DocRange() : doc_many(PFOR_BLOCK_SIZE + 3), freq_many(PFOR_BLOCK_SIZE + 3) {}
+  DocRange() = default;
+  ~DocRange() = default;
 
  public:
   DocRangeType type_ = DocRangeType::kNone;
 
   uint32_t doc_many_size_ = 0;
   uint32_t freq_many_size_ = 0;
-  std::vector<uint32_t> doc_many;
-  std::vector<uint32_t> freq_many;
+  std::vector<uint32_t>* doc_many = nullptr;
+  std::vector<uint32_t>* freq_many = nullptr;
 
   std::pair<uint32_t, uint32_t> doc_range;
 };
\ No newline at end of file
diff --git a/src/core/CLucene/index/FieldInfos.cpp 
b/src/core/CLucene/index/FieldInfos.cpp
index b3260e6d..00e0c427 100644
--- a/src/core/CLucene/index/FieldInfos.cpp
+++ b/src/core/CLucene/index/FieldInfos.cpp
@@ -103,6 +103,17 @@ bool FieldInfos::hasProx() {
        return false;
 }
 
+IndexVersion FieldInfos::getIndexVersion() {
+       int numFields = byNumber.size();
+       for (int i = 0; i < numFields; i++) {
+               FieldInfo* fi = fieldInfo(i);
+               if (fi->indexVersion_ > IndexVersion::kV0) {
+                       return fi->indexVersion_;
+               }
+       }
+       return IndexVersion::kV0;
+}
+
 void FieldInfos::addIndexed(const TCHAR** names, const bool storeTermVectors, 
const bool storePositionWithTermVector,
                                                        const bool 
storeOffsetWithTermVector) {
        size_t i = 0;
@@ -228,8 +239,16 @@ void FieldInfos::write(IndexOutput* output) const{
                if (fi->storePayloads) bits |= STORE_PAYLOADS;
                if (fi->hasProx) bits |= TERM_FREQ_AND_POSITIONS;
 
+               if (fi->getIndexVersion() > IndexVersion::kV0) {
+                       bits |= 0x80;
+               }
+
                output->writeString(fi->name,_tcslen(fi->name));
                output->writeByte(bits);
+
+               if (fi->getIndexVersion() > IndexVersion::kV0) {
+                       
output->writeVInt(static_cast<int32_t>(fi->getIndexVersion()));
+               }
        }
 }
 
@@ -239,7 +258,7 @@ void FieldInfos::read(IndexInput* input) {
        bool 
isIndexed,storeTermVector,storePositionsWithTermVector,storeOffsetWithTermVector,omitNorms,hasProx,storePayloads;
        for (int32_t i = 0; i < size; ++i){
            TCHAR* name = input->readString(); //we could read name into a 
string buffer, but we can't be sure what the maximum field length will be.
-               bits = input->readByte();
+                       bits = input->readByte();
                isIndexed = (bits & IS_INDEXED) != 0;
                storeTermVector = (bits & STORE_TERMVECTOR) != 0;
                storePositionsWithTermVector = (bits & 
STORE_POSITIONS_WITH_TERMVECTOR) != 0;
@@ -247,8 +266,14 @@ void FieldInfos::read(IndexInput* input) {
                omitNorms = (bits & OMIT_NORMS) != 0;
                        storePayloads = (bits & STORE_PAYLOADS) != 0;
                        hasProx = (bits & TERM_FREQ_AND_POSITIONS) != 0;
-   
-               addInternal(name, isIndexed, storeTermVector, 
storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, hasProx, 
storePayloads);
+
+               FieldInfo* fi = addInternal(name, isIndexed, storeTermVector, 
storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, hasProx, 
storePayloads);
+                       if ((bits & 0x80) == 0) {
+                               fi->setIndexVersion(IndexVersion::kV0);
+                       } else {
+                               IndexVersion indexVersion = 
(IndexVersion)input->readVInt();
+                               fi->setIndexVersion(indexVersion);
+                       } 
                _CLDELETE_CARRAY(name);
        }
 }
diff --git a/src/core/CLucene/index/IndexReader.cpp 
b/src/core/CLucene/index/IndexReader.cpp
index 91ca18ed..0fb2fe59 100644
--- a/src/core/CLucene/index/IndexReader.cpp
+++ b/src/core/CLucene/index/IndexReader.cpp
@@ -529,4 +529,8 @@ CL_NS(store)::Directory* IndexReader::getDirectory() {
                _internal->closeCallbacks.put(callback, parameter);
        }
 
+IndexVersion IndexReader::getIndexVersion() {
+  _CLTHROWA(CL_ERR_UnsupportedOperation, "IndexReader This reader does not 
support this method.");
+}
+
 CL_NS_END
diff --git a/src/core/CLucene/index/IndexReader.h 
b/src/core/CLucene/index/IndexReader.h
index 9079025a..defb6e75 100644
--- a/src/core/CLucene/index/IndexReader.h
+++ b/src/core/CLucene/index/IndexReader.h
@@ -11,6 +11,7 @@
 #include "CLucene/util/Array.h"
 #include "CLucene/util/VoidList.h"
 #include "CLucene/LuceneThreads.h"
+#include "CLucene/index/IndexVersion.h"
 
 CL_CLASS_DEF(store,Directory)
 CL_CLASS_DEF(store,LuceneLock)
@@ -676,6 +677,8 @@ public:
        */
        void addCloseCallback(CloseCallback callback, void* parameter);
 
+  virtual IndexVersion getIndexVersion();
+
   friend class SegmentReader;
   friend class MultiReader;
   friend class IndexWriter;
diff --git a/src/core/CLucene/index/IndexVersion.h 
b/src/core/CLucene/index/IndexVersion.h
new file mode 100644
index 00000000..448b1872
--- /dev/null
+++ b/src/core/CLucene/index/IndexVersion.h
@@ -0,0 +1,8 @@
+#pragma once
+
+enum class IndexVersion {
+  kV0 = 0,
+  kV1 = 1,
+
+  kNone
+};
\ No newline at end of file
diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 24ac2b08..6abd2716 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -280,10 +280,10 @@ void IndexWriter::init(Directory *d, Analyzer *a, const 
bool create, const bool
             if (analyzer->isSDocOpt()) {
                 docWriter = _CLNEW SDocumentsWriter<char>(directory, this);
             } else {
-                docWriter = _CLNEW DocumentsWriter(directory, this);
+                _CLTHROWA(CL_ERR_IllegalArgument, "IndexWriter::init: Only 
support SDocumentsWriter");
             }
         } else {
-            docWriter = _CLNEW DocumentsWriter(directory, this);
+            _CLTHROWA(CL_ERR_IllegalArgument, "IndexWriter::init: Only support 
SDocumentsWriter");
         }
         // Default deleter (for backwards compatibility) is
         // KeepOnlyLastCommitDeleter:
@@ -1702,14 +1702,13 @@ void IndexWriter::mergeTerms(bool hasProx) {
                 }
 
                 if ((++df % skipInterval) == 0) {
+                    freqOut->writeByte((char)CodeMode::kPfor);
+                    freqOut->writeVInt(docDeltaBuffer.size());
+                    encode(freqOut, docDeltaBuffer, true);
                     if (hasProx) {
-                        freqOut->writeByte((char)CodeMode::kPfor);
-                        freqOut->writeVInt(docDeltaBuffer.size());
-                        // doc
-                        encode(freqOut, docDeltaBuffer, true);
-                        // freq
                         encode(freqOut, freqBuffer, false);
                     }
+
                     skipWriter->setSkipData(lastDoc, false, -1);
                     skipWriter->bufferSkip(df);
                 }
@@ -1717,8 +1716,8 @@ void IndexWriter::mergeTerms(bool hasProx) {
                 assert(destDocId > lastDoc || df == 1);
                 lastDoc = destDocId;
 
+                docDeltaBuffer.push_back(destDocId);
                 if (hasProx) {
-                    // position
                     int32_t lastPosition = 0;
                     for (int32_t i = 0; i < descPositions.size(); i++) {
                         int32_t position = descPositions[i];
@@ -1726,15 +1725,7 @@ void IndexWriter::mergeTerms(bool hasProx) {
                         proxOut->writeVInt(delta);
                         lastPosition = position;
                     }
-
-                    docDeltaBuffer.push_back(destDocId);
                     freqBuffer.push_back(destFreq);
-                } else {
-                    docDeltaBuffer.push_back(destDocId);
-                    if (docDeltaBuffer.size() == PFOR_BLOCK_SIZE) {
-                        freqOut->writeVInt(docDeltaBuffer.size());
-                        encode(freqOut, docDeltaBuffer, true);
-                    }
                 }
 
                 smi = match[destDoc->srcIdx];
@@ -1775,7 +1766,7 @@ void IndexWriter::mergeTerms(bool hasProx) {
                 proxPointer = proxPointers[i];
             }
 
-            if (hasProx) {
+            {
                 auto& docDeltaBuffer = docDeltaBuffers[i];
                 auto& freqBuffer = freqBuffers[i];
 
@@ -1783,26 +1774,24 @@ void IndexWriter::mergeTerms(bool hasProx) {
                 freqOutput->writeVInt(docDeltaBuffer.size());
                 uint32_t lastDoc = 0;
                 for (int32_t i = 0; i < docDeltaBuffer.size(); i++) {
-                    uint32_t newDocCode = (docDeltaBuffer[i] - lastDoc) << 1;
-                    lastDoc = docDeltaBuffer[i];
-                    uint32_t freq = freqBuffer[i];
-                    if (1 == freq) {
-                        freqOutput->writeVInt(newDocCode | 1);
+                    uint32_t curDoc = docDeltaBuffer[i];
+                    if (hasProx) {
+                        uint32_t newDocCode = (docDeltaBuffer[i] - lastDoc) << 
1;
+                        lastDoc = curDoc;
+                        uint32_t freq = freqBuffer[i];
+                        if (1 == freq) {
+                            freqOutput->writeVInt(newDocCode | 1);
+                        } else {
+                            freqOutput->writeVInt(newDocCode);
+                            freqOutput->writeVInt(freq);
+                        }
                     } else {
-                        freqOutput->writeVInt(newDocCode);
-                        freqOutput->writeVInt(freq);
+                        freqOutput->writeVInt(curDoc - lastDoc);
+                        lastDoc = curDoc;
                     }
                 }
                 docDeltaBuffer.resize(0);
                 freqBuffer.resize(0);
-            } else {
-                freqOutput->writeVInt(docDeltaBuffers[i].size());
-                uint32_t lDoc = 0;
-                for (auto &docDelta: docDeltaBuffers[i]) {
-                    freqOutput->writeVInt(docDelta - lDoc);
-                    lDoc = docDelta;
-                }
-                docDeltaBuffers[i].resize(0);
             }
             
             int64_t skipPointer = skipListWriter->writeSkip(freqOutput);
diff --git a/src/core/CLucene/index/MultiSegmentReader.cpp 
b/src/core/CLucene/index/MultiSegmentReader.cpp
index 01438080..d4e8c8ea 100644
--- a/src/core/CLucene/index/MultiSegmentReader.cpp
+++ b/src/core/CLucene/index/MultiSegmentReader.cpp
@@ -485,6 +485,12 @@ const char* MultiSegmentReader::getObjectName() const{
   return getClassName();
 }
 
+IndexVersion MultiSegmentReader::getIndexVersion() {
+       for (size_t i = 0; i < subReaders->length; i++) {
+               return (*subReaders)[i]->getIndexVersion();
+       }
+       _CLTHROWA(CL_ERR_IllegalState, "MultiSegmentReader::getIndexVersion 
index open failed.");
+}
 
 
 
@@ -540,6 +546,14 @@ TermPositions* MultiTermDocs::__asTermPositions(){
   return NULL;
 }
 
+int32_t MultiTermDocs::docFreq() {
+       int32_t docFreq = 0;
+       for (size_t i = 0; i < readerTermDocs->length; i++) {
+               docFreq += readerTermDocs->values[i]->docFreq();
+       }
+       return docFreq;
+}
+
 int32_t MultiTermDocs::doc() const {
   CND_PRECONDITION(current!=NULL,"current==NULL, check that next() was 
called");
   return base + current->doc();
@@ -577,6 +591,12 @@ void MultiTermDocs::seek( Term* tterm) {
        base = 0;
        pointer = 0;
        current = NULL;
+
+       for (int32_t i = 0; i < readerTermDocs->length; i++) {
+               termDocs(i);
+       }
+       base = starts[pointer];
+       current = termDocs(pointer++);
 }
 
 bool MultiTermDocs::next() {
@@ -628,8 +648,8 @@ bool MultiTermDocs::readRange(DocRange* docRange) {
                        current = nullptr;
                } else {
                        if (docRange->type_ == DocRangeType::kMany) {
-                               auto begin = docRange->doc_many.begin();
-                               auto end = docRange->doc_many.begin() + 
docRange->doc_many_size_;
+                               auto begin = docRange->doc_many->begin();
+                               auto end = docRange->doc_many->begin() + 
docRange->doc_many_size_;
                                std::transform(begin, end, begin, 
[this](int32_t val) { return val + base; });
                        } else if (docRange->type_ == DocRangeType::kRange) {
                                docRange->doc_range.first += base;
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp 
b/src/core/CLucene/index/SDocumentWriter.cpp
index 5d6de917..c76b31fb 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -166,6 +166,7 @@ void SDocumentsWriter<T>::ThreadState::init(Document *doc, 
int32_t doc_id) {
         FieldInfo *fi = _parent->fieldInfos->add(field->name(), 
field->isIndexed(), field->isTermVectorStored(),
                                                  
field->isStorePositionWithTermVector(), field->isStoreOffsetWithTermVector(),
                                                  field->getOmitNorms(), 
!field->getOmitTermFreqAndPositions(), false);
+        fi->setIndexVersion(field->getIndexVersion());
         if (fi->isIndexed && !fi->omitNorms) {
             // Maybe grow our buffered norms
             if (_parent->norms.length <= fi->number) {
@@ -240,6 +241,7 @@ void SDocumentsWriter<T>::ThreadState::init(Document *doc, 
int32_t doc_id) {
         fp->docFields.values[fp->fieldCount++] = field;
     }
     _parent->hasProx_ = _parent->fieldInfos->hasProx();
+    _parent->indexVersion_ = _parent->fieldInfos->getIndexVersion();
 }
 
 template<typename T>
@@ -1172,12 +1174,10 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
         while (numToMerge > 0) {
 
             if ((++df % skipInterval) == 0) {
+                freqOut->writeByte((char)CodeMode::kPfor);
+                freqOut->writeVInt(docDeltaBuffer.size());
+                encode(freqOut, docDeltaBuffer, true);
                 if (hasProx_) {
-                    freqOut->writeByte((char)CodeMode::kPfor);
-                    freqOut->writeVInt(docDeltaBuffer.size());
-                    // doc
-                    encode(freqOut, docDeltaBuffer, true);
-                    // freq
                     encode(freqOut, freqBuffer, false);
                 }
 
@@ -1205,22 +1205,14 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
             // changing the format to match Lucene's segment
             // format.
 
+            docDeltaBuffer.push_back(doc);
             if (hasProx_) {
-                // position
                 for (int32_t j = 0; j < termDocFreq; j++) {
                     const int32_t code = prox.readVInt();
                     assert(0 == (code & 1));
                     proxOut->writeVInt(code >> 1);
                 }
-
-                docDeltaBuffer.push_back(doc);
                 freqBuffer.push_back(termDocFreq);
-            } else {
-                docDeltaBuffer.push_back(doc);
-                if (docDeltaBuffer.size() == PFOR_BLOCK_SIZE) {
-                    freqOut->writeVInt(docDeltaBuffer.size());
-                    encode(freqOut, docDeltaBuffer, true);
-                }
             }
 
             if (!minState->nextDoc()) {
@@ -1253,13 +1245,14 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
 
         // Done merging this term
         {
-            if (hasProx_) {
-                freqOut->writeByte((char)CodeMode::kDefault);
-                freqOut->writeVInt(docDeltaBuffer.size());
-                uint32_t lastDoc = 0;
-                for (int32_t i = 0; i < docDeltaBuffer.size(); i++) {
-                    uint32_t newDocCode = (docDeltaBuffer[i] - lastDoc) << 1;
-                    lastDoc = docDeltaBuffer[i];
+            freqOut->writeByte((char)CodeMode::kDefault);
+            freqOut->writeVInt(docDeltaBuffer.size());
+            uint32_t lastDoc = 0;
+            for (int32_t i = 0; i < docDeltaBuffer.size(); i++) {
+                uint32_t curDoc = docDeltaBuffer[i];
+                if (hasProx_) {
+                    uint32_t newDocCode = (curDoc - lastDoc) << 1;
+                    lastDoc = curDoc;
                     uint32_t freq = freqBuffer[i];
                     if (1 == freq) {
                         freqOut->writeVInt(newDocCode | 1);
@@ -1267,18 +1260,13 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
                         freqOut->writeVInt(newDocCode);
                         freqOut->writeVInt(freq);
                     }
+                } else {
+                    freqOut->writeVInt(curDoc - lastDoc);
+                    lastDoc = curDoc;
                 }
-                docDeltaBuffer.resize(0);
-                freqBuffer.resize(0);
-            } else {
-                freqOut->writeVInt(docDeltaBuffer.size());
-                uint32_t lDoc = 0;
-                for (auto& docDelta : docDeltaBuffer) {
-                    freqOut->writeVInt(docDelta - lDoc);
-                    lDoc = docDelta;
-                }
-                docDeltaBuffer.resize(0);
             }
+            docDeltaBuffer.resize(0);
+            freqBuffer.resize(0);
         }
         
         int64_t skipPointer = skipListWriter->writeSkip(freqOut);
diff --git a/src/core/CLucene/index/SDocumentWriter.h 
b/src/core/CLucene/index/SDocumentWriter.h
index 5163c2a1..7e250be3 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -54,6 +54,7 @@ private:
     std::ostream* infoStream{};
     int64_t ramBufferSize;
     bool hasProx_ = false;
+    IndexVersion indexVersion_ = IndexVersion::kV1;
 
 public:
     class FieldMergeState;
diff --git a/src/core/CLucene/index/SegmentReader.cpp 
b/src/core/CLucene/index/SegmentReader.cpp
index 0b53de82..e0ec0c14 100644
--- a/src/core/CLucene/index/SegmentReader.cpp
+++ b/src/core/CLucene/index/SegmentReader.cpp
@@ -1123,4 +1123,9 @@ bool SegmentReader::normsClosed() {
     }
     return true;
 }
+
+IndexVersion SegmentReader::getIndexVersion() {
+    return _fieldInfos->getIndexVersion();
+}
+
 CL_NS_END
diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp 
b/src/core/CLucene/index/SegmentTermDocs.cpp
index 765f6424..9108f1df 100644
--- a/src/core/CLucene/index/SegmentTermDocs.cpp
+++ b/src/core/CLucene/index/SegmentTermDocs.cpp
@@ -21,7 +21,8 @@ CL_NS_DEF(index)
 SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) : 
parent(_parent), freqStream(_parent->freqStream->clone()),
                                                                  count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
                                                                  
maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL), 
freqBasePointer(0), proxBasePointer(0),
-                                                                 
skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0) {
+                                                                 
skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0), 
indexVersion_(_parent->_fieldInfos->getIndexVersion()),
+                                                                 
hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx, 
indexVersion_) {
     CND_CONDITION(_parent != NULL, "Parent is NULL");
     memset(docs,0,PFOR_BLOCK_SIZE*sizeof(int32_t));
     memset(freqs,0,PFOR_BLOCK_SIZE*sizeof(int32_t));
@@ -35,6 +36,10 @@ TermPositions *SegmentTermDocs::__asTermPositions() {
     return NULL;
 }
 
+int32_t SegmentTermDocs::docFreq() {
+    return df;
+}
+
 void SegmentTermDocs::seek(Term *term) {
     TermInfo *ti = parent->tis->get(term);
     seek(ti, term);
@@ -63,7 +68,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) {
     count = 0;
     FieldInfo *fi = parent->_fieldInfos->fieldInfo(term->field());
     currentFieldStoresPayloads = (fi != NULL) ? fi->storePayloads : false;
-    hasProx = (fi != nullptr) && fi->hasProx;
+    // hasProx = (fi != nullptr) && fi->hasProx;
     if (ti == NULL) {
         df = 0;
     } else {// punt case
@@ -90,141 +95,39 @@ int32_t SegmentTermDocs::freq() const {
 }
 
 bool SegmentTermDocs::next()  {
-    pointer++;
-    if (pointer >= pointerMax) {
-        pointerMax = SegmentTermDocs::read(docs, freqs, PFOR_BLOCK_SIZE);    
// refill buffer
-        if (pointerMax != 0) {
-            pointer = 0;
-        } else {
-            // NOTE: do not close here, try to close by upper caller or 
destruct.
-            //SegmentTermDocs::close();                          // close 
stream
-            _doc = LUCENE_INT32_MAX_SHOULDBE;            // set to sentinel 
value
-            return false;
-        }
+    if (count == df) {
+        _doc = LUCENE_INT32_MAX_SHOULDBE;
+        return false;
     }
-    _doc = docs[pointer];
-    _freq = freqs[pointer];
-    return true;
-}
 
-/*bool SegmentTermDocs::next() {
-    while (true) {
-        if (count == df)
-            return false;
+    _doc = buffer_.getDoc();
+    if (hasProx) {
+        _freq = buffer_.getFreq();
+    }
 
-        uint32_t docCode = freqStream->readVInt();
-        _doc += docCode >> 1;  //unsigned shift
-        if ((docCode & 1) != 0)// if low bit is set
-            _freq = 1;         // _freq is one
-        else
-            _freq = freqStream->readVInt();// else read _freq
-        count++;
+    count++;
 
-        if ((deletedDocs == NULL) || (_doc >= 0 && deletedDocs->get(_doc) == 
false))
-            break;
-        skippingDoc();
-    }
     return true;
-}*/
+}
 
 int32_t SegmentTermDocs::read(int32_t *docs, int32_t *freqs, int32_t length) {
     int32_t i = 0;
+    
+    if (count == df) {
+        return i;
+    }
 
-    if (hasProx) {
-        if (count == df) {
-            return i;
-        }
-
-        char mode = freqStream->readByte();
-
-        uint32_t arraySize = freqStream->readVInt();
-        std::vector<uint32_t> _docs(arraySize);
-        std::vector<uint32_t> _freqs(arraySize);
-
-        if (mode == (char)CodeMode::kDefault) {
-            uint32_t docDelta = 0;
-            for (uint32_t i = 0; i < arraySize; i++) {
-                uint32_t docCode = freqStream->readVInt();
-                docDelta += (docCode >> 1);
-                _docs[i] = docDelta;
-                if ((docCode & 1) != 0) {
-                    _freqs[i] = 1;
-                } else {
-                    _freqs[i] = freqStream->readVInt();
-                }
-            }
-        } else {
-            // NOTE: Pad arraySize from 511 to 512 for alignment since the 
first block size is 511, and add one more extra space to prevent overflow.
-            auto paddingSize = (arraySize / PFOR_BLOCK_SIZE) * PFOR_BLOCK_SIZE 
+ PFOR_BLOCK_SIZE;
-            _docs.resize(paddingSize + 1);
-            _freqs.resize(paddingSize + 1);
-            {
-                uint32_t SerializedSize = freqStream->readVInt();
-                std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
-                freqStream->readBytes(buf.data(), SerializedSize);
-                P4DEC(buf.data(), arraySize, _docs.data());
-            }
-            {
-                uint32_t SerializedSize = freqStream->readVInt();
-                std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
-                freqStream->readBytes(buf.data(), SerializedSize);
-                P4NZDEC(buf.data(), arraySize, _freqs.data());
-            }
-        }
+    while (i < length && count < df) {
+        _doc = buffer_.getDoc();
+        docs[i] = _doc;
 
-        while (i < arraySize && count < df) {
-            _doc = _docs[i];
-            _freq = _freqs[i];
-            count++;
-            if (deletedDocs == NULL || (_doc >= 0 && !deletedDocs->get(_doc))) 
{
-                docs[i] = _doc;
-                freqs[i] = _freq;
-                i++;
-            }
+        if (hasProx) {
+            _freq = buffer_.getFreq();
+            freqs[i] = _freq;
         }
 
-    } else {
-        //todo: one optimization would be to get the pointer buffer for ram or 
mmap dirs
-        //and iterate over them instead of using readByte() intensive 
functions.
-            if (i < length && count < df) {
-                uint32_t arraySize = freqStream->readVInt();
-                if (arraySize < PFOR_BLOCK_SIZE) {
-                    int32_t _docDelta{0};
-                    while (i < length && count < df && i < arraySize) {
-                        // manually inlined call to next() for speed
-                        uint32_t docCode = freqStream->readVInt();
-                        _docDelta += docCode;
-                        _doc = _docDelta;
-                        _freq = 1;         // _freq is one
-                        count++;
-
-                        if (deletedDocs == NULL || (_doc >= 0 && 
!deletedDocs->get(_doc))) {
-                            docs[i] = _doc;
-                            freqs[i] = _freq;
-                            i++;
-                        }
-                    }
-                } else {
-                // need one more space for decode, otherwise will buffer 
overflow when decoding
-                    std::vector<uint32_t> arr(arraySize + 1);
-                    uint32_t serializedSize = freqStream->readVInt();
-                // need more space for decode, otherwise will buffer overflow 
when decoding
-                    std::vector<uint8_t> buf(serializedSize + PFOR_BLOCK_SIZE);
-                    freqStream->readBytes(buf.data(), serializedSize);
-                    P4DEC(buf.data(), arraySize, arr.data());
-
-                    while (i < length && count < df && i < arraySize) {
-                        _doc = arr[i];
-                        _freq = 1;// _freq is one
-                        if (deletedDocs == NULL || (_doc >= 0 && 
!deletedDocs->get(_doc))) {
-                            docs[i] = _doc;
-                            freqs[i] = _freq;
-                            i++;
-                        }
-                        count++;
-                }
-            }
-        }
+        count++;
+        i++;
     }
 
     return i;
@@ -235,75 +138,17 @@ bool SegmentTermDocs::readRange(DocRange* docRange) {
         return false;
     }
 
-    if (hasProx) {
-        char mode = freqStream->readByte();
-        uint32_t arraySize = freqStream->readVInt();
-        if (mode == (char)CodeMode::kDefault) {
-            uint32_t docDelta = 0;
-            for (uint32_t i = 0; i < arraySize; i++) {
-                uint32_t docCode = freqStream->readVInt();
-                docDelta += (docCode >> 1);
-                docRange->doc_many[i] = docDelta;
-                if ((docCode & 1) != 0) {
-                    docRange->freq_many[i] = 1;
-                } else {
-                    docRange->freq_many[i] = freqStream->readVInt();
-                }
-            }
-            docRange->type_ = DocRangeType::kMany;
-            docRange->doc_many_size_ = arraySize;
-            docRange->freq_many_size_ = arraySize;
-        } else {
-            {
-                uint32_t SerializedSize = freqStream->readVInt();
-                std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
-                freqStream->readBytes(buf.data(), SerializedSize);
-                P4DEC(buf.data(), arraySize, docRange->doc_many.data());
-            }
-            {
-                uint32_t SerializedSize = freqStream->readVInt();
-                std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
-                freqStream->readBytes(buf.data(), SerializedSize);
-                P4NZDEC(buf.data(), arraySize, docRange->freq_many.data());
-            }
-            docRange->type_ = DocRangeType::kMany;
-            docRange->doc_many_size_ = arraySize;
-            docRange->freq_many_size_ = arraySize;
-        }
-        count += arraySize;
-    } else {
-        uint32_t arraySize = freqStream->readVInt();
-        if (arraySize < PFOR_BLOCK_SIZE) {
-            uint32_t docDelta = 0;
-            for (uint32_t i = 0; i < arraySize; i++) {
-                uint32_t docCode = freqStream->readVInt();
-                docDelta += docCode;
-                docRange->doc_many[i] = docDelta;
-            }
-            docRange->type_ = DocRangeType::kMany;
-            docRange->doc_many_size_ = arraySize;
-        } else {
-            {
-                uint32_t serializedSize = freqStream->readVInt();
-                std::vector<uint8_t> buf(serializedSize + PFOR_BLOCK_SIZE);
-                freqStream->readBytes(buf.data(), serializedSize);
-                P4DEC(buf.data(), arraySize, docRange->doc_many.data());
-            }
-            docRange->type_ = DocRangeType::kMany;
-            docRange->doc_many_size_ = arraySize;
-        }
-        count += arraySize;
-    }
+    buffer_.readRange(docRange);
 
-    if (docRange->type_ == DocRangeType::kMany) {
-        if (docRange->doc_many_size_ > 0) {
-            uint32_t start = docRange->doc_many[0];
-            uint32_t end = docRange->doc_many[docRange->doc_many_size_ - 1];
-            if ((end - start) == docRange->doc_many_size_ - 1) {
-                docRange->doc_range.first = start;
-                docRange->doc_range.second = start + docRange->doc_many_size_;
-                docRange->type_ = DocRangeType::kRange;
-            }
+    count += docRange->doc_many_size_;
+
+    if (docRange->doc_many_size_ > 0) {
+        uint32_t start = (*docRange->doc_many)[0];
+        uint32_t end = (*docRange->doc_many)[docRange->doc_many_size_ - 1];
+        if ((end - start) == docRange->doc_many_size_ - 1) {
+            docRange->doc_range.first = start;
+            docRange->doc_range.second = start + docRange->doc_many_size_;
+            docRange->type_ = DocRangeType::kRange;
         }
     }
 
@@ -329,7 +174,7 @@ bool SegmentTermDocs::skipTo(const int32_t target) {
 
             _doc = skipListReader->getDoc();
             count = newCount;
-            pointer = pointerMax;
+            buffer_.refill();
         }
     }
 
@@ -341,5 +186,120 @@ bool SegmentTermDocs::skipTo(const int32_t target) {
     return true;
 }
 
+void TermDocsBuffer::refill() {
+    cur_doc_ = 0;
+    cur_freq_ = 0;
+
+    if (indexVersion_ == IndexVersion::kV1) {
+        size_ = refillV1();
+    } else {
+        size_ = refillV0();
+    }
+}
+
+void TermDocsBuffer::readRange(DocRange* docRange) {
+    int32_t size = 0;
+    if (indexVersion_ == IndexVersion::kV1) {
+        size = refillV1();
+    } else {
+        size = refillV0();
+    }
+    docRange->type_ = DocRangeType::kMany;
+    docRange->doc_many = &docs_;
+    docRange->doc_many_size_ = size;
+    if (hasProx_) {
+        docRange->freq_many = &freqs_;
+        docRange->freq_many_size_ = size;
+    }
+}
+
+int32_t TermDocsBuffer::refillV0() {
+    if (hasProx_) {
+        char mode = freqStream_->readByte();
+        uint32_t arraySize = freqStream_->readVInt();
+        if (mode == (char)CodeMode::kPfor) {
+            {
+                uint32_t SerializedSize = freqStream_->readVInt();
+                std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
+                freqStream_->readBytes(buf.data(), SerializedSize);
+                P4DEC(buf.data(), arraySize, docs_.data());
+            }
+            {
+                uint32_t SerializedSize = freqStream_->readVInt();
+                std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
+                freqStream_->readBytes(buf.data(), SerializedSize);
+                P4NZDEC(buf.data(), arraySize, freqs_.data());
+            }
+        } else if (mode == (char)CodeMode::kDefault) {
+            uint32_t docDelta = 0;
+            for (uint32_t i = 0; i < arraySize; i++) {
+                uint32_t docCode = freqStream_->readVInt();
+                docDelta += (docCode >> 1);
+                docs_[i] = docDelta;
+                if ((docCode & 1) != 0) {
+                    freqs_[i] = 1;
+                } else {
+                    freqs_[i] = freqStream_->readVInt();
+                }
+            }
+        }
+        return arraySize;
+    } else {
+        uint32_t arraySize = freqStream_->readVInt();
+        if (arraySize < PFOR_BLOCK_SIZE) {
+            uint32_t docDelta = 0;
+            for (uint32_t i = 0; i < arraySize; i++) {
+                uint32_t docCode = freqStream_->readVInt();
+                docDelta += docCode;
+                docs_[i] = docDelta;
+            }
+        } else {
+            {
+                uint32_t serializedSize = freqStream_->readVInt();
+                std::vector<uint8_t> buf(serializedSize + PFOR_BLOCK_SIZE);
+                freqStream_->readBytes(buf.data(), serializedSize);
+                P4DEC(buf.data(), arraySize, docs_.data());
+            }
+        }
+        return arraySize;
+    }
+}
+
+int32_t TermDocsBuffer::refillV1() {
+    char mode = freqStream_->readByte();
+    uint32_t arraySize = freqStream_->readVInt();
+    if (mode == (char)CodeMode::kPfor) {
+        {
+            uint32_t SerializedSize = freqStream_->readVInt();
+            std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
+            freqStream_->readBytes(buf.data(), SerializedSize);
+            P4DEC(buf.data(), arraySize, docs_.data());
+        }
+        if (hasProx_) {
+            uint32_t SerializedSize = freqStream_->readVInt();
+            std::vector<uint8_t> buf(SerializedSize + PFOR_BLOCK_SIZE);
+            freqStream_->readBytes(buf.data(), SerializedSize);
+            P4NZDEC(buf.data(), arraySize, freqs_.data());
+        }
+    } else if (mode == (char)CodeMode::kDefault) {
+        uint32_t docDelta = 0;
+        for (uint32_t i = 0; i < arraySize; i++) {
+            uint32_t docCode = freqStream_->readVInt();
+            if (hasProx_) {
+                docDelta += (docCode >> 1);
+                docs_[i] = docDelta;
+                if ((docCode & 1) != 0) {
+                    freqs_[i] = 1;
+                } else {
+                    freqs_[i] = freqStream_->readVInt();
+                }
+            } else {
+                docDelta += docCode;
+                docs_[i] = docDelta;
+            }            
+        }
+    }
+    return arraySize;
+}
 
 CL_NS_END
diff --git a/src/core/CLucene/index/Terms.h b/src/core/CLucene/index/Terms.h
index 4d701ca7..620105fd 100644
--- a/src/core/CLucene/index/Terms.h
+++ b/src/core/CLucene/index/Terms.h
@@ -82,6 +82,10 @@ public:
     *  No dynamic casting is required and no RTTI data is needed to do this
     */
        virtual TermPositions* __asTermPositions()=0;
+
+       virtual int32_t docFreq() {
+               _CLTHROWA(CL_ERR_UnsupportedOperation, "TermDocs::docFreq does 
not support this method.");
+       }
 };
 
 
diff --git a/src/core/CLucene/index/_FieldInfos.h 
b/src/core/CLucene/index/_FieldInfos.h
index 414dcc90..ed142c44 100644
--- a/src/core/CLucene/index/_FieldInfos.h
+++ b/src/core/CLucene/index/_FieldInfos.h
@@ -8,6 +8,8 @@
 #define _lucene_index_FieldInfos_
 
 #include "CLucene/store/Directory.h"
+#include "CLucene/index/IndexVersion.h"
+
 
 CL_CLASS_DEF(document,Document)
 CL_CLASS_DEF(document,Field)
@@ -33,6 +35,7 @@ class FieldInfo :LUCENE_BASE{
 
        bool omitNorms; // omit norms associated with indexed fields
        bool hasProx = false;
+       IndexVersion indexVersion_ = IndexVersion::kV1;
 
        bool storePayloads; // whether this field stores payloads together with 
term positions
 
@@ -67,6 +70,10 @@ class FieldInfo :LUCENE_BASE{
        * @memory - caller is responsible for deleting the returned object
        */
        FieldInfo* clone();
+
+public:
+       void setIndexVersion(IndexVersion indexVersion) { indexVersion_ = 
indexVersion; }
+       IndexVersion getIndexVersion() { return indexVersion_; }
 };
 
 /** Access to the Field Info file that describes document fields and whether or
@@ -126,6 +133,7 @@ public:
        void addIndexed(const TCHAR** names, const bool storeTermVectors, const 
bool storePositionWithTermVector, const bool storeOffsetWithTermVector);
 
        bool hasProx();
+       IndexVersion getIndexVersion();
 
        /**
        * Assumes the fields are not storing term vectors.
diff --git a/src/core/CLucene/index/_MultiSegmentReader.h 
b/src/core/CLucene/index/_MultiSegmentReader.h
index c8460273..46b32caa 100644
--- a/src/core/CLucene/index/_MultiSegmentReader.h
+++ b/src/core/CLucene/index/_MultiSegmentReader.h
@@ -119,6 +119,8 @@ public:
 
   static const char* getClassName();
   const char* getObjectName() const;
+
+  IndexVersion getIndexVersion() override;
 };
 
 
@@ -159,6 +161,8 @@ public:
   void close();
 
   virtual TermPositions* __asTermPositions();
+
+  int32_t docFreq() override;
 };
 
 
diff --git a/src/core/CLucene/index/_SegmentHeader.h 
b/src/core/CLucene/index/_SegmentHeader.h
index 8cbb8959..e177f576 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -25,10 +25,67 @@
 #include "DirectoryIndexReader.h"
 #include "_SkipListReader.h"
 #include "CLucene/util/_ThreadLocal.h"
+#include "CLucene/index/IndexVersion.h"
 
 CL_NS_DEF(index)
 class SegmentReader;
 
+class TermDocsBuffer {
+public:
+  TermDocsBuffer(CL_NS(store)::IndexInput* freqStream, bool hasProx, 
IndexVersion indexVersion)
+      : docs_(PFOR_BLOCK_SIZE + 3),
+        freqs_(PFOR_BLOCK_SIZE + 3),
+        freqStream_(freqStream),
+        hasProx_(hasProx),
+        indexVersion_(indexVersion) {
+  }
+
+  ~TermDocsBuffer() {
+    cur_doc_ = 0;
+    cur_freq_ = 0;
+
+    docs_.clear();
+    freqs_.clear();
+
+    freqStream_ = nullptr;
+  }
+
+  inline int32_t getDoc() {
+    if (cur_doc_ >= size_) {
+      refill();
+    }
+    return docs_[cur_doc_++];
+  }
+
+  inline int32_t getFreq() {
+    if (cur_freq_ >= size_) {
+      refill();
+    }
+    return freqs_[cur_freq_++];
+  }
+
+  void refill();
+  void readRange(DocRange* docRange);
+
+private:
+  int32_t refillV0();
+  int32_t refillV1();
+
+private:
+  uint32_t size_ = 0;
+
+  uint32_t cur_doc_ = 0;
+  std::vector<uint32_t> docs_;
+
+  uint32_t cur_freq_ = 0;
+  std::vector<uint32_t> freqs_;
+
+  CL_NS(store)::IndexInput* freqStream_ = nullptr;
+
+  bool hasProx_ = false;
+  IndexVersion indexVersion_ = IndexVersion::kV0; 
+};
+
 class SegmentTermDocs:public virtual TermDocs {
 protected:
   const SegmentReader* parent;
@@ -57,6 +114,7 @@ private:
 protected:
   bool currentFieldStoresPayloads;
   bool hasProx = false;
+  IndexVersion indexVersion_ = IndexVersion::kV0; 
 
 public:
   ///\param Parent must be a segment reader
@@ -82,9 +140,18 @@ public:
 
   virtual TermPositions* __asTermPositions();
 
+  int32_t docFreq() override;
+
 protected:
   virtual void skippingDoc(){}
   virtual void skipProx(const int64_t /*proxPointer*/, const int32_t 
/*payloadLength*/){}
+
+private:
+  bool readRangeV0(DocRange* docRange);
+  bool readRangeV1(DocRange* docRange);
+
+private:
+  TermDocsBuffer buffer_;
 };
 
 
@@ -439,6 +506,8 @@ private:
   void startCommit();
   void rollbackCommit();
 
+  IndexVersion getIndexVersion() override;
+
   //allow various classes to access the internals of this. this allows us to 
have
   //a more tight idea of the package
   friend class IndexReader;
diff --git a/src/core/CLucene/search/query/DcoIdSetIterator.h 
b/src/core/CLucene/search/query/DcoIdSetIterator.h
new file mode 100644
index 00000000..88aa4313
--- /dev/null
+++ b/src/core/CLucene/search/query/DcoIdSetIterator.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "CLucene/index/DocRange.h"
+
+class DocIdSetIterator {
+public:
+  DocIdSetIterator() = default;
+  virtual ~DocIdSetIterator() = default;
+
+  virtual int32_t docID() = 0;
+  virtual int32_t nextDoc() = 0;
+  virtual int32_t advance(int32_t target) = 0;
+
+  virtual int32_t docFreq() const = 0;
+  virtual bool readRange(DocRange* docRange) const = 0;
+};
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermIterator.h 
b/src/core/CLucene/search/query/TermIterator.h
new file mode 100644
index 00000000..e0cf23a4
--- /dev/null
+++ b/src/core/CLucene/search/query/TermIterator.h
@@ -0,0 +1,51 @@
+#pragma once
+
+#include "CLucene/search/query/DcoIdSetIterator.h"
+#include "CLucene/index/Terms.h"
+
+#include <limits.h>
+
+CL_NS_USE(index)
+
+class TermIterator : public DocIdSetIterator {
+public:
+  TermIterator() = default;
+  TermIterator(TermDocs* termDocs) : termDocs_(termDocs) {
+  }
+
+  virtual ~TermIterator() = default;
+
+  bool isEmpty() {
+    return termDocs_ == nullptr;
+  }
+
+  int32_t docID() override {
+    uint32_t docId = termDocs_->doc();
+    return docId >= INT_MAX ? INT_MAX : docId;
+  }
+
+  int32_t nextDoc() override {
+    if (termDocs_->next()) {
+      return termDocs_->doc();
+    }
+    return INT_MAX;
+  }
+
+  int32_t advance(int32_t target) override {
+    if (termDocs_->skipTo(target)) {
+      return termDocs_->doc();
+    }
+    return INT_MAX;
+  }
+
+  int32_t docFreq() const override {
+    return termDocs_->docFreq();
+  }
+
+  bool readRange(DocRange* docRange) const override {
+    return termDocs_->readRange(docRange);
+  }
+  
+private:
+  TermDocs* termDocs_ = nullptr;
+};
\ No newline at end of file
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e2264120..e1c13305 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -199,6 +199,8 @@ SET(clucene_core_Files
     ./CLucene/search/spans/SpanWeight.cpp
     ./CLucene/search/spans/SpanWeight.h
     ./CLucene/search/spans/TermSpans.cpp
+    ./CLucene/search/query/DcoIdSetIterator.h
+    ./CLucene/search/query/TermIterator.h
     )
 
 #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means 
rebuilding them for the core)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris-thirdparty] branch clucene updated: [Optimize](search) Optimizemultiple terms conjunction query (#117)

Reply via email to