(doris-thirdparty) branch clucene updated: [opt](inverted index) implement block max WAND algorithm with BM25 similarity for query optimization (#374)

yangsiyu Wed, 14 Jan 2026 18:48:51 -0800

This is an automated email from the ASF dual-hosted git repository.

yangsiyu pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git



The following commit(s) were added to refs/heads/clucene by this push:
     new ac9475a6a61 [opt](inverted index) implement block max WAND algorithm 
with BM25 similarity for query optimization (#374)
ac9475a6a61 is described below

commit ac9475a6a61d37117fb44d0ed9d0bc1b5b57f86f
Author: zzzxl <[email protected]>
AuthorDate: Thu Jan 15 10:48:04 2026 +0800

    [opt](inverted index) implement block max WAND algorithm with BM25 
similarity for query optimization (#374)
---
 src/core/CLucene/index/BlockMaxBM25Similarity.cpp |  78 ++++++++++
 src/core/CLucene/index/BlockMaxBM25Similarity.h   |  43 ++++++
 src/core/CLucene/index/DocRange.h                 |  32 ++---
 src/core/CLucene/index/IndexWriter.cpp            |  53 ++++++-
 src/core/CLucene/index/IndexWriter.h              |   3 +-
 src/core/CLucene/index/MultiSegmentReader.cpp     |  15 +-
 src/core/CLucene/index/SDocumentWriter.cpp        |  69 +++++++--
 src/core/CLucene/index/SDocumentWriter.h          |  13 +-
 src/core/CLucene/index/SegmentTermDocs.cpp        |  90 +++++++++---
 src/core/CLucene/index/SkipListReader.cpp         |  15 ++
 src/core/CLucene/index/SkipListWriter.cpp         |   4 +-
 src/core/CLucene/index/Terms.h                    |  20 ++-
 src/core/CLucene/index/_MultiSegmentReader.h      |   3 +-
 src/core/CLucene/index/_SegmentHeader.h           |  10 +-
 src/core/CLucene/index/_SkipListReader.h          |  10 +-
 src/core/CLucene/index/_SkipListWriter.h          |   6 +-
 src/core/CMakeLists.txt                           |   1 +
 src/test/index/TestReadRange.cpp                  | 165 ++++++++++++++++++++--
 src/test/posting/TestBlockMaxScoreV3.cpp          |   4 +-
 19 files changed, 535 insertions(+), 99 deletions(-)

diff --git a/src/core/CLucene/index/BlockMaxBM25Similarity.cpp 
b/src/core/CLucene/index/BlockMaxBM25Similarity.cpp
new file mode 100644
index 00000000000..4dfbda9b5c1
--- /dev/null
+++ b/src/core/CLucene/index/BlockMaxBM25Similarity.cpp
@@ -0,0 +1,78 @@
+//
+// BlockMaxBM25Similarity - 用于 Block Max Score 计算的 BM25 相似度类实现
+//
+// 与 doris::segment_v2::BM25Similarity 保持一致的实现方式。
+//
+
+#include "BlockMaxBM25Similarity.h"
+
+#include <cstddef>
+#include <limits>
+
+namespace lucene::index {
+
+const int32_t BlockMaxBM25Similarity::MAX_INT32 = 
std::numeric_limits<int32_t>::max();
+const uint32_t BlockMaxBM25Similarity::MAX_INT4 = 
long_to_int4(static_cast<uint64_t>(MAX_INT32));
+const int32_t BlockMaxBM25Similarity::NUM_FREE_VALUES = 255 - 
static_cast<int>(MAX_INT4);
+
+std::vector<float> BlockMaxBM25Similarity::LENGTH_TABLE = []() {
+    std::vector<float> table(256);
+    for (int32_t i = 0; i < 256; i++) {
+        table[i] = byte4_to_int(static_cast<uint8_t>(i));
+    }
+    return table;
+}();
+
+BlockMaxBM25Similarity::BlockMaxBM25Similarity(float avgdl) : _avgdl(avgdl), 
_cache(256) {
+    compute_cache();
+}
+
+void BlockMaxBM25Similarity::compute_cache() {
+    for (size_t i = 0; i < 256; i++) {
+        _cache[i] = 1.0F / (_k1 * ((1 - _b) + _b * LENGTH_TABLE[i] / _avgdl));
+    }
+}
+
+int32_t BlockMaxBM25Similarity::number_of_leading_zeros(uint64_t value) {
+    if (value == 0) {
+        return 64;
+    }
+    return __builtin_clzll(value);
+}
+
+uint32_t BlockMaxBM25Similarity::long_to_int4(uint64_t i) {
+    if (i > std::numeric_limits<uint64_t>::max()) {
+        throw;
+    }
+
+    int32_t numBits = 64 - number_of_leading_zeros(i);
+    if (numBits < 4) {
+        return static_cast<uint32_t>(i);
+    } else {
+        int32_t shift = numBits - 4;
+        uint32_t encoded = static_cast<uint32_t>(i >> shift) & 0x07;
+        return encoded | ((shift + 1) << 3);
+    }
+}
+
+uint64_t BlockMaxBM25Similarity::int4_to_long(uint32_t i) {
+    uint64_t bits = i & 0x07;
+    int32_t shift = (i >> 3) - 1;
+
+    if (shift < 0) {
+        return bits;
+    } else {
+        return (bits | 0x08) << shift;
+    }
+}
+
+float BlockMaxBM25Similarity::byte4_to_int(uint8_t b) {
+    if (b < NUM_FREE_VALUES) {
+        return static_cast<float>(b);
+    } else {
+        uint64_t decoded = NUM_FREE_VALUES + int4_to_long(b - NUM_FREE_VALUES);
+        return static_cast<float>(decoded);
+    }
+}
+
+} // namespace lucene::index
diff --git a/src/core/CLucene/index/BlockMaxBM25Similarity.h 
b/src/core/CLucene/index/BlockMaxBM25Similarity.h
new file mode 100644
index 00000000000..23f77fe0897
--- /dev/null
+++ b/src/core/CLucene/index/BlockMaxBM25Similarity.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <vector>
+
+namespace lucene::index {
+
+class BlockMaxBM25Similarity {
+public:
+    explicit BlockMaxBM25Similarity(float avgdl);
+    ~BlockMaxBM25Similarity() = default;
+
+    float tf_factor(int32_t freq, uint8_t norm_byte) const {
+        float norm_inverse = _cache[norm_byte];
+        return 1.0F - 1.0F / (1.0F + static_cast<float>(freq) * norm_inverse);
+    }
+
+    float avgdl() const { return _avgdl; }
+
+private:
+    void compute_cache();
+
+    static int32_t number_of_leading_zeros(uint64_t value);
+    static uint32_t long_to_int4(uint64_t i);
+    static uint64_t int4_to_long(uint32_t i);
+    static float byte4_to_int(uint8_t b);
+
+    static const int32_t MAX_INT32;
+    static const uint32_t MAX_INT4;
+    static const int32_t NUM_FREE_VALUES;
+
+    static std::vector<float> LENGTH_TABLE;
+
+    float _k1 = 1.2F;
+    float _b = 0.75F;
+    float _avgdl = 0.0F;
+
+    std::vector<float> _cache;
+};
+using BlockMaxBM25SimilarityPtr = std::shared_ptr<BlockMaxBM25Similarity>;
+
+} // namespace lucene::index
\ No newline at end of file
diff --git a/src/core/CLucene/index/DocRange.h 
b/src/core/CLucene/index/DocRange.h
index 726144b7fe8..bd3d40a7420 100644
--- a/src/core/CLucene/index/DocRange.h
+++ b/src/core/CLucene/index/DocRange.h
@@ -7,29 +7,27 @@
 #include "CLucene/CLConfig.h"
 
 enum class DocRangeType {
-  kMany = 0,
-  kRange = 1,
+    kMany = 0,
+    kRange = 1,
 
-  kNone
+    kNone
 };
 
 class DocRange {
- public:
-  DocRange() = default;
-  ~DocRange() = default;
+public:
+    DocRange() = default;
+    ~DocRange() = default;
 
- public:
-  DocRangeType type_ = DocRangeType::kNone;
+public:
+    DocRangeType type_ = DocRangeType::kNone;
 
-  uint32_t doc_many_size_ = 0;
-  uint32_t freq_many_size_ = 0;
-  uint32_t norm_many_size_ = 0;
+    uint32_t doc_many_size_ = 0;
+    uint32_t freq_many_size_ = 0;
+    uint32_t norm_many_size_ = 0;
 
-  std::vector<uint32_t>* doc_many = nullptr;
-  std::vector<uint32_t>* freq_many = nullptr;
-  std::vector<uint32_t>* norm_many = nullptr;
+    std::vector<uint32_t>* doc_many = nullptr;
+    std::vector<uint32_t>* freq_many = nullptr;
+    std::vector<uint32_t>* norm_many = nullptr;
 
-  std::pair<uint32_t, uint32_t> doc_range;
-
-  bool need_positions = false;
+    std::pair<uint32_t, uint32_t> doc_range;
 };
\ No newline at end of file
diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 490782f144b..7ad04859e61 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -1327,6 +1327,9 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
             }
         }
 
+        // Build destFieldNormsMapValues from srcFieldNormsMapValues using 
_trans_vec
+        std::vector<std::map<std::wstring, std::vector<uint8_t>>> 
destFieldNormsMapValues(
+                numDestIndexes);
         if (hasNorms) {
             for (int srcIndex = 0; srcIndex < numIndices; srcIndex++) {
                 auto reader = readers[srcIndex];
@@ -1351,6 +1354,24 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
                     }
                 }
             }
+
+            // Build destFieldNormsMapValues
+            for (int destIdx = 0; destIdx < numDestIndexes; destIdx++) {
+                for (const auto& [fieldName, _] : srcFieldNormsMapValues[0]) {
+                    
destFieldNormsMapValues[destIdx][fieldName].resize(dest_index_docs[destIdx], 0);
+                }
+            }
+            for (int srcIdx = 0; srcIdx < numIndices; srcIdx++) {
+                for (const auto& [fieldName, srcNorms] : 
srcFieldNormsMapValues[srcIdx]) {
+                    for (size_t srcDocId = 0; srcDocId < srcNorms.size(); 
srcDocId++) {
+                        auto [destIdx, destDocId] = 
_trans_vec[srcIdx][srcDocId];
+                        if (destIdx != UINT32_MAX && destDocId != UINT32_MAX) {
+                            
destFieldNormsMapValues[destIdx][fieldName][destDocId] =
+                                    srcNorms[srcDocId];
+                        }
+                    }
+                }
+            }
         }
 
         /// write fields and create files writers
@@ -1406,7 +1427,7 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
         }
 
         /// merge terms
-        mergeTerms(hasProx, indexVersion);
+        mergeTerms(hasProx, hasNorms, indexVersion, destFieldNormsMapValues);
 
         /// merge norms if have
         if (hasNorms){
@@ -1643,7 +1664,8 @@ protected:
 
 };
 
-void IndexWriter::mergeTerms(bool hasProx, IndexVersion indexVersion) {
+void IndexWriter::mergeTerms(bool hasProx, bool hasNorms, IndexVersion 
indexVersion,
+        const std::vector<std::map<std::wstring, std::vector<uint8_t>>>& 
destFieldNormsMapValues) {
     auto queue = _CLNEW SegmentMergeQueue(readers.size());
     auto numSrcIndexes = readers.size();
     //std::vector<TermPositions *> postingsList(numSrcIndexes);
@@ -1794,7 +1816,32 @@ void IndexWriter::mergeTerms(bool hasProx, IndexVersion 
indexVersion) {
                         PforUtil::encodePos(proxOut, posBuffer);
                     }
 
-                    skipWriter->setSkipData(lastDoc, false, -1);
+                    int32_t maxBlockFreq = -1;
+                    int32_t minBlockNorm = -1;
+
+                    if (hasProx && hasNorms) {
+                        auto normsIt = 
destFieldNormsMapValues[destIdx].find(smallestTerm->field());
+                        const std::vector<uint8_t>* destNorms =
+                                (normsIt != 
destFieldNormsMapValues[destIdx].end())
+                                        ? &normsIt->second
+                                        : nullptr;
+
+                        for (size_t i = 0; i < docDeltaBuffer.size(); ++i) {
+                            int32_t freq = freqBuffer[i];
+                            maxBlockFreq = std::max(maxBlockFreq, freq);
+                            if (destNorms) {
+                                uint32_t docId = docDeltaBuffer[i];
+                                if (docId < destNorms->size()) {
+                                    uint8_t normByte = (*destNorms)[docId];
+                                    if (minBlockNorm == -1 || normByte < 
minBlockNorm) {
+                                        minBlockNorm = normByte;
+                                    }
+                                }
+                            }
+                        }
+                    }
+
+                    skipWriter->setSkipData(lastDoc, false, -1, maxBlockFreq, 
minBlockNorm);
                     skipWriter->bufferSkip(df);
                 }
 
diff --git a/src/core/CLucene/index/IndexWriter.h 
b/src/core/CLucene/index/IndexWriter.h
index e8c5fbd0ec3..de1bab67bde 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -328,7 +328,8 @@ public:
     // write fields info file
     void writeFields(lucene::store::Directory* d, std::string segment);
     // merge terms and write files
-    void mergeTerms(bool hasProx, IndexVersion indexVersion);
+    void mergeTerms(bool hasProx, bool hasNorms, IndexVersion indexVersion,
+            const std::vector<std::map<std::wstring, std::vector<uint8_t>>>& 
destFieldNormsMapValues);
     // merge norms and write files
     void mergeNorms(
             std::vector<uint32_t> dest_index_docs,
diff --git a/src/core/CLucene/index/MultiSegmentReader.cpp 
b/src/core/CLucene/index/MultiSegmentReader.cpp
index 73428860f1a..c102808b4a9 100644
--- a/src/core/CLucene/index/MultiSegmentReader.cpp
+++ b/src/core/CLucene/index/MultiSegmentReader.cpp
@@ -270,6 +270,10 @@ const ArrayBase<IndexReader*>* 
MultiSegmentReader::getSubReaders() const{
   return subReaders;
 }
 
+const int32_t* MultiSegmentReader::getStarts() const{
+  return starts;
+}
+
 bool MultiSegmentReader::document(int32_t n, CL_NS(document)::Document& doc, 
const FieldSelector* fieldSelector){
        ensureOpen();
   int32_t i = readerIndex(n);                    // find segment num
@@ -771,17 +775,6 @@ bool MultiTermDocs::skipTo(const int32_t target) {
        }
 }
 
-void MultiTermDocs::skipToBlock(const int32_t target) {
-    while (pointer < subReaders->length && target >= starts[pointer]) {
-        base = starts[pointer];
-        current = termDocs(pointer++);
-    }
-    
-    if (current != NULL) {
-        current->skipToBlock(target - base);
-    }
-}
-
 void MultiTermDocs::close() {
 //Func - Closes all MultiTermDocs managed by this instance
 //Pre  - true
diff --git a/src/core/CLucene/index/SDocumentWriter.cpp 
b/src/core/CLucene/index/SDocumentWriter.cpp
index e85d96dbd81..39ea1720d05 100644
--- a/src/core/CLucene/index/SDocumentWriter.cpp
+++ b/src/core/CLucene/index/SDocumentWriter.cpp
@@ -986,8 +986,8 @@ void SDocumentsWriter<T>::writeNorms(const std::string 
&segmentName, int32_t tot
                     v = 0;
                 } else {
                     normsOut->writeLong(n->total_term_count_);
-                    v = n->out.getFilePointer();
-                    n->out.writeTo(normsOut);
+                    v = n->norms_buffer.size();
+                    normsOut->writeBytes(n->norms_buffer.data(), 
n->norms_buffer.size());
                     n->reset();
                 }
                 if (v < totalNumDoc)
@@ -1032,6 +1032,21 @@ void 
SDocumentsWriter<T>::writeSegment(std::vector<std::string> &flushedFiles) {
     std::sort(allFields.begin(), allFields.end(), 
ThreadState::FieldData::sort);
     const int32_t numAllFields = allFields.size();
 
+    if (hasProx_) {
+        const int32_t numFieldInfos = fieldInfos->size();
+        for (int32_t fieldIdx = 0; fieldIdx < numFieldInfos; fieldIdx++) {
+            if (fieldIdx >= static_cast<int32_t>(norms.length)) {
+                continue;
+            }
+            BufferedNorms* n = norms[fieldIdx];
+            if (n != nullptr && numDocsInRAM > 0) {
+                float avgdl =
+                        static_cast<float>(n->total_term_count_) / 
static_cast<float>(numDocsInRAM);
+                n->createBM25Similarity(avgdl);
+            }
+        }
+    }
+
     skipListWriter = _CLNEW DefaultSkipListWriter(termsOut->skipInterval, 
termsOut->maxSkipLevels,
                                                   numDocsInRAM, freqOut, 
proxOut, indexVersion_);
 
@@ -1136,8 +1151,8 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
                                      STermInfosWriter<T> *termsOut,
                                      IndexOutput *freqOut,
                                      IndexOutput *proxOut) {
-
-    const int32_t fieldNumber = (*fields)[0]->fieldInfo->number;
+    const FieldInfo* fieldInfo = (*fields)[0]->fieldInfo;
+    const int32_t fieldNumber = fieldInfo->number;
     int32_t numFields = fields->length;
     // In Doris, field number is always 1 by now.
     assert(numFields == 1);
@@ -1161,6 +1176,13 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
     const int32_t skipInterval = termsOut->skipInterval;
     auto currentFieldStorePayloads = false;
 
+    // Block Max WAND: Pre-compute if scoring info is available
+    BufferedNorms* blockMaxNorms = nullptr;
+    if (fieldNumber < static_cast<int32_t>(norms.length)) {
+        blockMaxNorms = norms[fieldNumber];
+    }
+    const bool enableBlockMaxWand = hasProx_ && (blockMaxNorms != nullptr && 
!fieldInfo->omitNorms);
+
     ValueArray<FieldMergeState *> termStates(numFields);
 
     while (numFields > 0) {
@@ -1205,12 +1227,39 @@ void 
SDocumentsWriter<T>::appendPostings(ArrayBase<typename ThreadState::FieldDa
         while (numToMerge > 0) {
 
             if ((++df % skipInterval) == 0) {
+                int32_t maxBlockFreq = -1;
+                int32_t maxBlockNorm = -1;
+                
+                if (enableBlockMaxWand) {
+                    const auto& nb = blockMaxNorms->norms_buffer;
+                    auto* bm25 = blockMaxNorms->bm25_similarity_.get();
+                    float maxTFFactor = -1.0F;
+                    
+                    if (bm25 != nullptr) {
+                        for (size_t i = 0; i < docDeltaBuffer.size(); ++i) {
+                            int32_t freq = freqBuffer[i];
+                            int32_t docId = docDeltaBuffer[i];
+                            
+                            if (static_cast<size_t>(docId) < nb.size()) {
+                                uint8_t normByte = nb[docId];
+                                float tf_factor = bm25->tf_factor(freq, 
normByte);
+                                
+                                if (tf_factor > maxTFFactor) {
+                                    maxTFFactor = tf_factor;
+                                    maxBlockFreq = freq;
+                                    maxBlockNorm = normByte;
+                                }
+                            }
+                        }
+                    }
+                }
+
                 PforUtil::pfor_encode(freqOut, docDeltaBuffer, freqBuffer, 
hasProx_);
                 if (hasProx_ && indexVersion_ >= IndexVersion::kV2) {
                     PforUtil::encodePos(proxOut, posBuffer);
                 }
 
-                skipListWriter->setSkipData(lastDoc, 
currentFieldStorePayloads, lastPayloadLength);
+                skipListWriter->setSkipData(lastDoc, 
currentFieldStorePayloads, lastPayloadLength, maxBlockFreq, maxBlockNorm);
                 skipListWriter->bufferSkip(df);
             }
 
@@ -1425,24 +1474,22 @@ SDocumentsWriter<T>::BufferedNorms::BufferedNorms() {
 }
 template<typename T>
 void SDocumentsWriter<T>::BufferedNorms::add(float_t norm) {
-    uint8_t b = search::Similarity::encodeNorm(norm);
-    out.writeByte(b);
-    upto++;
+    _CLTHROWA(CL_ERR_UnsupportedOperation, 
"SDocumentsWriter::BufferedNorms::add(float_t) not supported");
 }
 
 template<typename T>
 void SDocumentsWriter<T>::BufferedNorms::add(int32_t norm) {
     total_term_count_ += norm;
     uint8_t b = search::Similarity::encodeNorm(norm);
-    out.writeByte(b);
+    norms_buffer.push_back(b);
     upto++;
 }
 
 template<typename T>
 void SDocumentsWriter<T>::BufferedNorms::reset() {
-    out.reset();
     upto = 0;
     total_term_count_ = 0;
+    norms_buffer.clear();
 }
 template<typename T>
 void SDocumentsWriter<T>::BufferedNorms::fill(int32_t docID) {
@@ -1452,7 +1499,7 @@ void SDocumentsWriter<T>::BufferedNorms::fill(int32_t 
docID) {
     // varying different fields, because we are not
     // storing the norms sparsely (see LUCENE-830)
     if (upto < docID) {
-        fillBytes(&out, defaultNorm, docID - upto);
+        norms_buffer.insert(norms_buffer.end(), docID - upto, defaultNorm);
         upto = docID;
     }
 }
diff --git a/src/core/CLucene/index/SDocumentWriter.h 
b/src/core/CLucene/index/SDocumentWriter.h
index 5fc8247b7c4..5d475a19df7 100644
--- a/src/core/CLucene/index/SDocumentWriter.h
+++ b/src/core/CLucene/index/SDocumentWriter.h
@@ -14,6 +14,7 @@
 #include "_FieldInfos.h"
 #include "_TermInfo.h"
 #include "_TermInfosWriter.h"
+#include "BlockMaxBM25Similarity.h"
 
 CL_CLASS_DEF(analysis, Analyzer)
 CL_CLASS_DEF(analysis, Token)
@@ -83,16 +84,22 @@ public:
    * to a partial segment. */
     class BufferedNorms {
     public:
-        CL_NS(store)::RAMOutputStream out;
-        int32_t upto{};
-
         BufferedNorms();
+        ~BufferedNorms() = default;
+
         void add(float_t norm);
         void add(int32_t norm);
         void reset();
         void fill(int32_t docID);
 
+        void createBM25Similarity(float avgdl) {
+            bm25_similarity_ = 
std::make_unique<lucene::index::BlockMaxBM25Similarity>(avgdl);
+        }
+
+        int32_t upto {};
         int64_t total_term_count_ = 0;
+        std::vector<uint8_t> norms_buffer;
+        std::unique_ptr<lucene::index::BlockMaxBM25Similarity> 
bm25_similarity_;
     };
     template<typename T2>
     class BlockPool {
diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp 
b/src/core/CLucene/index/SegmentTermDocs.cpp
index 2e2fd8a5192..58aa3c38dad 100644
--- a/src/core/CLucene/index/SegmentTermDocs.cpp
+++ b/src/core/CLucene/index/SegmentTermDocs.cpp
@@ -144,7 +144,7 @@ bool SegmentTermDocs::next()  {
 
 int32_t SegmentTermDocs::read(int32_t *docs, int32_t *freqs, int32_t length) {
     int32_t i = 0;
-    
+
     if (count == df) {
         return i;
     }
@@ -197,24 +197,46 @@ bool SegmentTermDocs::readRange(DocRange* docRange) {
     }
 
     buffer_.readRange(docRange);
+
     count += docRange->doc_many_size_;
 
-    if (docRange->need_positions && hasProx && docRange->doc_many_size_ > 0 && 
df >= skipInterval) {
+    if (docRange->doc_many_size_ > 0) {
+        uint32_t start = (*docRange->doc_many)[0];
+        uint32_t end = (*docRange->doc_many)[docRange->doc_many_size_ - 1];
+        if ((end - start) == docRange->doc_many_size_ - 1) {
+            docRange->doc_range.first = start;
+            docRange->doc_range.second = start + docRange->doc_many_size_;
+            docRange->type_ = DocRangeType::kRange;
+        }
+    }
+
+    return true;
+}
+
+bool SegmentTermDocs::readBlock(DocRange* docRange) {
+    if (count >= df) {
+        return false;
+    }
+
+    buffer_.readRange(docRange);
+    count += docRange->doc_many_size_;
+
+    if (docRange->doc_many_size_ > 0 && df >= skipInterval) {
         if (skipListReader == nullptr) {
-            skipListReader = _CLNEW DefaultSkipListReader(freqStream->clone(), 
maxSkipLevels, skipInterval, indexVersion_);
+            skipListReader = _CLNEW DefaultSkipListReader(freqStream->clone(), 
maxSkipLevels,
+                                                          skipInterval, 
indexVersion_);
             skipListReader->setIoContext(io_ctx_);
         }
         if (!haveSkipped) {
-            skipListReader->init(skipPointer, freqBasePointer, 
proxBasePointer, df, hasProx, currentFieldStoresPayloads);
+            skipListReader->init(skipPointer, freqBasePointer, 
proxBasePointer, df, hasProx,
+                                 currentFieldStoresPayloads);
             haveSkipped = true;
         }
-        
+
         uint32_t firstDoc = (*docRange->doc_many)[0];
-        int32_t skippedCount = skipListReader->skipTo(firstDoc);
+        skipListReader->skipTo(firstDoc);
 
-        if (skipListReader->getDoc() >= 0) {
-            skipProx(skipListReader->getProxPointer(), 
skipListReader->getPayloadLength());
-        }
+        skipProx(skipListReader->getProxPointer(), 
skipListReader->getPayloadLength());
     }
 
     if (docRange->doc_many_size_ > 0) {
@@ -263,30 +285,32 @@ bool SegmentTermDocs::skipTo(const int32_t target) {
     return true;
 }
 
-void SegmentTermDocs::skipToBlock(const int32_t target) {
+bool SegmentTermDocs::skipToBlock(const int32_t target) {
     if (df >= skipInterval) {
         if (skipListReader == NULL) {
-            skipListReader = _CLNEW DefaultSkipListReader(freqStream->clone(), 
maxSkipLevels, skipInterval, indexVersion_);
+            skipListReader = _CLNEW DefaultSkipListReader(freqStream->clone(), 
maxSkipLevels,
+                                                          skipInterval, 
indexVersion_);
             skipListReader->setIoContext(io_ctx_);
         }
 
         if (!haveSkipped) {
-            skipListReader->init(skipPointer, freqBasePointer, 
proxBasePointer, df, hasProx, currentFieldStoresPayloads);
+            skipListReader->init(skipPointer, freqBasePointer, 
proxBasePointer, df, hasProx,
+                                 currentFieldStoresPayloads);
             haveSkipped = true;
         }
 
-        int32_t newCount = skipListReader->skipTo(target);
-        if (newCount > count) {
-            freqStream->seek(skipListReader->getFreqPointer());
-            skipProx(skipListReader->getProxPointer(), 
skipListReader->getPayloadLength());
-
-            _doc = skipListReader->getDoc();
-            count = newCount;
-            // Note: We do NOT call buffer_.refill() here.
-            // The caller will use readRange() to read the next block.
+        int32_t currentLastDoc = skipListReader->getLastDocInBlock();
+        if (target <= currentLastDoc && haveSkipped) {
+            return false;
         }
+        int32_t newCount = skipListReader->skipTo(target);
+        freqStream->seek(skipListReader->getFreqPointer());
+        skipProx(skipListReader->getProxPointer(), 
skipListReader->getPayloadLength());
+        _doc = skipListReader->getDoc();
+        count = newCount;
+        return true;
     }
-    // If df < skipInterval, nothing to skip. Caller will use readRange() 
sequentially.
+    return false;
 }
 
 void TermDocsBuffer::refill() {
@@ -409,4 +433,26 @@ void TermDocsBuffer::refillNorm(int32_t size) {
         }
     }
 }
+
+int32_t SegmentTermDocs::getMaxBlockFreq() {
+    if (skipListReader != nullptr) {
+        return skipListReader->getMaxBlockFreq();
+    }
+    return -1;
+}
+
+int32_t SegmentTermDocs::getMaxBlockNorm() {
+    if (skipListReader != nullptr) {
+        return skipListReader->getMaxBlockNorm();
+    }
+    return -1;
+}
+
+int32_t SegmentTermDocs::getLastDocInBlock() {
+    if (skipListReader != nullptr) {
+        return skipListReader->getLastDocInBlock();
+    }
+    return -1;
+}
+
 CL_NS_END
diff --git a/src/core/CLucene/index/SkipListReader.cpp 
b/src/core/CLucene/index/SkipListReader.cpp
index 426e8d12317..5da80263edc 100644
--- a/src/core/CLucene/index/SkipListReader.cpp
+++ b/src/core/CLucene/index/SkipListReader.cpp
@@ -63,6 +63,13 @@ int32_t MultiLevelSkipListReader::getDoc() const {
        return lastDoc;
 }
 
+int32_t MultiLevelSkipListReader::getLastDocInBlock() const {
+       if (skipDoc[0] == 0 && haveSkipped && numberOfSkipLevels > 0) {
+               const_cast<MultiLevelSkipListReader*>(this)->loadNextSkip(0);;
+       }
+       return skipDoc[0];
+}
+
 int32_t MultiLevelSkipListReader::skipTo(const int32_t target) {
        if (!haveSkipped) {
                // first time, load skip levels
@@ -111,6 +118,7 @@ bool MultiLevelSkipListReader::loadNextSkip(const int32_t 
level) {
                // this skip list is exhausted
                skipDoc[level] = LUCENE_INT32_MAX_SHOULDBE;
                if (numberOfSkipLevels > level) numberOfSkipLevels = level;
+               onSkipExhausted(level);
                return false;
        }
 
@@ -364,6 +372,13 @@ int32_t DefaultSkipListReader::readSkipData(const int32_t 
level, CL_NS(store)::I
        return delta;
 }
 
+void DefaultSkipListReader::onSkipExhausted(const int32_t level) {
+       if (level == 0) {
+               maxBlockFreq = -1;
+               maxBlockNorm = -1;
+       }
+}
+
 int32_t DefaultSkipListReader::getMaxBlockFreq() const {
        return maxBlockFreq;
 }
diff --git a/src/core/CLucene/index/SkipListWriter.cpp 
b/src/core/CLucene/index/SkipListWriter.cpp
index 72b3be27980..63aed4e3443 100644
--- a/src/core/CLucene/index/SkipListWriter.cpp
+++ b/src/core/CLucene/index/SkipListWriter.cpp
@@ -118,8 +118,8 @@ void DefaultSkipListWriter::resetSkip() {
     if (hasProx) {
         Arrays<int64_t>::fill(lastSkipProxPointer, numberOfSkipLevels, 
proxOutput->getFilePointer());
         if (indexVersion == IndexVersion::kV4) {
-            maxBlockFreq = 0;
-            maxBlockNorm = 0;
+            maxBlockFreq = -1;
+            maxBlockNorm = -1;
         }
     }
     Arrays<int64_t>::fill(lastSkipFreqPointer, numberOfSkipLevels, 
freqOutput->getFilePointer());
diff --git a/src/core/CLucene/index/Terms.h b/src/core/CLucene/index/Terms.h
index aefe508e5d0..dbde2d0c4d0 100644
--- a/src/core/CLucene/index/Terms.h
+++ b/src/core/CLucene/index/Terms.h
@@ -64,6 +64,9 @@ public:
        virtual int32_t read(int32_t* docs, int32_t* freqs, int32_t length)=0;
         virtual int32_t read(int32_t* docs, int32_t* freqs, int32_t* norms, 
int32_t length)=0;
        virtual bool readRange(DocRange* docRange) = 0;
+       virtual bool readBlock(DocRange* docRange) {
+               _CLTHROWA(CL_ERR_UnsupportedOperation, "readBlock is not 
supported for multi-segment readers");
+       }
 
        // Skips entries to the first beyond the current whose document number 
is
        // greater than or equal to <i>target</i>. <p>Returns true iff there is 
such
@@ -79,10 +82,9 @@ public:
        // Some implementations are considerably more efficient than that.
        virtual bool skipTo(const int32_t target)=0;
 
-       // Skip to the block containing the target document using skip list.
-       // This is an optimization that positions the stream for subsequent 
readRange calls.
-       // Unlike skipTo, this does not scan to find the exact document.
-       virtual void skipToBlock(const int32_t target) {}
+       virtual bool skipToBlock(const int32_t target) {
+               _CLTHROWA(CL_ERR_UnsupportedOperation, "skipToBlock is not 
supported for multi-segment readers");
+       }
 
        // Frees associated resources.
        virtual void close() = 0;
@@ -103,6 +105,16 @@ public:
         virtual int32_t docNorm() {
            return 0;
        }
+
+       virtual int32_t getMaxBlockFreq() {
+               _CLTHROWA(CL_ERR_UnsupportedOperation, "getMaxBlockFreq is not 
supported for multi-segment readers");
+       }
+       virtual int32_t getMaxBlockNorm() {
+               _CLTHROWA(CL_ERR_UnsupportedOperation, "getMaxBlockNorm is not 
supported for multi-segment readers");
+       }
+       virtual int32_t getLastDocInBlock() {
+               _CLTHROWA(CL_ERR_UnsupportedOperation, "getLastDocInBlock is 
not supported for multi-segment readers");
+       }
 };
 
 
diff --git a/src/core/CLucene/index/_MultiSegmentReader.h 
b/src/core/CLucene/index/_MultiSegmentReader.h
index f33ed5f4a07..02f718a66ce 100644
--- a/src/core/CLucene/index/_MultiSegmentReader.h
+++ b/src/core/CLucene/index/_MultiSegmentReader.h
@@ -120,6 +120,7 @@ public:
   int32_t getTermInfosIndexDivisor();
 
   const CL_NS(util)::ArrayBase<IndexReader*>* getSubReaders() const;
+  const int32_t* getStarts() const;
 
   friend class MultiReader;
   friend class SegmentReader;
@@ -176,8 +177,6 @@ public:
    /* A Possible future optimization could skip entire segments */
   bool skipTo(const int32_t target);
 
-  /** Skip to the block containing target using skip list. */
-  void skipToBlock(const int32_t target) override;
 
   void close();
 
diff --git a/src/core/CLucene/index/_SegmentHeader.h 
b/src/core/CLucene/index/_SegmentHeader.h
index 8686eb23a15..80ab2331265 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -236,18 +236,22 @@ public:
   virtual int32_t read(int32_t* docs, int32_t* freqs,int32_t* norms, int32_t 
length);
 
   bool readRange(DocRange* docRange) override;
+  bool readBlock(DocRange* docRange) override;
 
   /** Optimized implementation. */
   virtual bool skipTo(const int32_t target);
 
-    /** Skip to the block containing target using skip list. */
-    void skipToBlock(const int32_t target) override;
+  bool skipToBlock(const int32_t target) override;
 
   virtual TermPositions* __asTermPositions();
 
   void setLoadStats(bool load_stats) override;
   void setIoContext(const void* io_ctx) override;
 
+  int32_t getMaxBlockFreq() override;
+  int32_t getMaxBlockNorm() override;
+  int32_t getLastDocInBlock() override;
+
   int32_t docFreq() override;
 
   int32_t docNorm() override;
@@ -351,7 +355,7 @@ private:
   int32_t freq() const{ return SegmentTermDocs::freq(); }
   int32_t norm() const{ return SegmentTermDocs::norm(); }
   bool skipTo(const int32_t target){ return SegmentTermDocs::skipTo(target); }
-    void skipToBlock(const int32_t target) { 
SegmentTermDocs::skipToBlock(target); }
+  bool skipToBlock(const int32_t target) { return 
SegmentTermDocs::skipToBlock(target); }
 
 private:
   IndexVersion indexVersion_ = IndexVersion::kV0; 
diff --git a/src/core/CLucene/index/_SkipListReader.h 
b/src/core/CLucene/index/_SkipListReader.h
index f7562e2f9f2..723aa7c2207 100644
--- a/src/core/CLucene/index/_SkipListReader.h
+++ b/src/core/CLucene/index/_SkipListReader.h
@@ -65,6 +65,8 @@ public:
        *  has skipped.  */
        int32_t getDoc() const;
 
+       int32_t getLastDocInBlock() const;
+
        /** Skips entries to the first beyond the current whose document number 
is
        *  greater than or equal to <i>target</i>. Returns the current doc 
count.
        */
@@ -79,6 +81,8 @@ protected:
        /** Seeks the skip entry on the given level */
        virtual void seekChild(const int32_t level);
 
+       virtual void onSkipExhausted(const int32_t level) {}
+
        void close();
 
        /** initializes the reader */
@@ -158,8 +162,8 @@ private:
        int64_t lastProxPointer{0};
        int32_t lastPayloadLength;
 
-       int32_t maxBlockFreq = 0;
-       int32_t maxBlockNorm = 0;
+       int32_t maxBlockFreq = -1;
+       int32_t maxBlockNorm = -1;
 
 public:
        DefaultSkipListReader(CL_NS(store)::IndexInput* _skipStream, const 
int32_t maxSkipLevels, const int32_t _skipInterval, IndexVersion indexVersion);
@@ -191,6 +195,8 @@ protected:
 
        void setLastSkipData(const int32_t level);
 
+       void onSkipExhausted(const int32_t level) override;
+
        int32_t readSkipData(const int32_t level, CL_NS(store)::IndexInput* 
_skipStream);
 };
 
diff --git a/src/core/CLucene/index/_SkipListWriter.h 
b/src/core/CLucene/index/_SkipListWriter.h
index c32351bd47c..92d1c2f3cd6 100644
--- a/src/core/CLucene/index/_SkipListWriter.h
+++ b/src/core/CLucene/index/_SkipListWriter.h
@@ -110,8 +110,8 @@ private:
   bool hasProx = false;
   IndexVersion indexVersion = IndexVersion::kV0;
 
-  int32_t maxBlockFreq = 0;
-  int32_t maxBlockNorm = 0;
+  int32_t maxBlockFreq = -1;
+  int32_t maxBlockNorm = -1;
   
 protected:
 
@@ -128,7 +128,7 @@ public:
     /**
    * Sets the values for the current skip data.
    */
-    void setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength, 
int32_t maxBlockFreq = 0, int32_t maxBlockNorm = 0);
+    void setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength, 
int32_t maxBlockFreq = -1, int32_t maxBlockNorm = -1);
     void resetSkip();
 };
 
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 0a19a2d278f..fd7f7079802 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -103,6 +103,7 @@ SET(clucene_core_Files
     ./CLucene/index/DocumentsWriter.cpp
     ./CLucene/index/SDocumentWriter.cpp
     ./CLucene/index/SDocumentWriter.h
+    ./CLucene/index/BlockMaxBM25Similarity.cpp
     ./CLucene/index/DocumentsWriterThreadState.cpp
     ./CLucene/index/SegmentTermVector.cpp
     ./CLucene/index/TermVectorReader.cpp
diff --git a/src/test/index/TestReadRange.cpp b/src/test/index/TestReadRange.cpp
index 0542dbb065f..ea2bf8edd7a 100644
--- a/src/test/index/TestReadRange.cpp
+++ b/src/test/index/TestReadRange.cpp
@@ -128,11 +128,11 @@ static TermDocsResult readWithNext(TermDocs* termDocs) {
     return result;
 }
 
-// Read using readRange() method
-static TermDocsResult readWithRange(TermDocs* termDocs) {
+// Read using readBlock() method
+static TermDocsResult readWithBlock(TermDocs* termDocs) {
     TermDocsResult result;
     DocRange docRange;
-    while (termDocs->readRange(&docRange)) {
+    while (termDocs->readBlock(&docRange)) {
         for (uint32_t i = 0; i < docRange.doc_many_size_; ++i) {
             result.docs.push_back((*docRange.doc_many)[i]);
             if (docRange.freq_many && i < docRange.freq_many_size_) {
@@ -167,13 +167,12 @@ static TermPositionsResult 
readPositionsWithNext(TermPositions* termPos) {
     return result;
 }
 
-// Read positions using readRange() and nextDeltaPosition() method
-static TermPositionsResult readPositionsWithRange(TermPositions* termPos) {
+// Read positions using readBlock() and nextDeltaPosition() method
+static TermPositionsResult readPositionsWithBlock(TermPositions* termPos) {
     TermPositionsResult result;
     DocRange docRange;
-    docRange.need_positions = true;
 
-    while (termPos->readRange(&docRange)) {
+    while (termPos->readBlock(&docRange)) {
         for (uint32_t i = 0; i < docRange.doc_many_size_; ++i) {
             result.docs.push_back((*docRange.doc_many)[i]);
             int32_t freq = 1;
@@ -296,7 +295,7 @@ void TestReadRangeBasic(CuTest* tc) {
             // Read with readRange()
             TermDocs* termDocs2 = reader->termDocs();
             termDocs2->seek(term);
-            TermDocsResult result2 = readWithRange(termDocs2);
+            TermDocsResult result2 = readWithBlock(termDocs2);
             termDocs2->close();
             _CLDELETE(termDocs2);
 
@@ -366,7 +365,7 @@ void TestReadRangePositions(CuTest* tc) {
             // Read with readRange()/nextDeltaPosition()
             TermPositions* termPos2 = reader->termPositions();
             termPos2->seek(term);
-            TermPositionsResult result2 = readPositionsWithRange(termPos2);
+            TermPositionsResult result2 = readPositionsWithBlock(termPos2);
             termPos2->close();
             _CLDELETE(termPos2);
 
@@ -642,7 +641,7 @@ void TestReadRangeLargeDataset(CuTest* tc) {
             // Read with readRange()
             TermDocs* termDocs2 = reader->termDocs();
             termDocs2->seek(term);
-            TermDocsResult result2 = readWithRange(termDocs2);
+            TermDocsResult result2 = readWithBlock(termDocs2);
             termDocs2->close();
             _CLDELETE(termDocs2);
 
@@ -712,7 +711,7 @@ void TestReadRangeVersions(CuTest* tc) {
                 // Read with readRange()
                 TermDocs* termDocs2 = reader->termDocs();
                 termDocs2->seek(term);
-                TermDocsResult result2 = readWithRange(termDocs2);
+                TermDocsResult result2 = readWithBlock(termDocs2);
                 termDocs2->close();
                 _CLDELETE(termDocs2);
 
@@ -768,7 +767,7 @@ void TestReadRangeEdgeCases(CuTest* tc) {
 
         TermDocs* termDocs2 = reader->termDocs();
         termDocs2->seek(term);
-        TermDocsResult result2 = readWithRange(termDocs2);
+        TermDocsResult result2 = readWithBlock(termDocs2);
         termDocs2->close();
         _CLDELETE(termDocs2);
 
@@ -831,7 +830,7 @@ void TestReadRangeEdgeCases(CuTest* tc) {
 
         TermDocs* termDocs2 = reader->termDocs();
         termDocs2->seek(term);
-        TermDocsResult result2 = readWithRange(termDocs2);
+        TermDocsResult result2 = readWithBlock(termDocs2);
         termDocs2->close();
         _CLDELETE(termDocs2);
 
@@ -848,6 +847,145 @@ void TestReadRangeEdgeCases(CuTest* tc) {
     std::cout << "\nTestReadRangeEdgeCases success" << std::endl;
 }
 
+//=============================================================================
+// Test: Block-based interfaces (readBlock, skipToBlock, getMaxBlockFreq,
+//       getMaxBlockNorm, getLastDocInBlock) working together
+//=============================================================================
+void TestBlockBasedInterfaces(CuTest* tc) {
+    std::srand(getDaySeed());
+    std::mt19937 rng(getDaySeed());
+
+    std::string fieldName = "content";
+    std::vector<std::string> datas;
+
+    // Create index with a common term appearing frequently
+    std::string commonWord = "target";
+    for (int32_t i = 0; i < DEFAULT_DOC_COUNT; ++i) {
+        std::string text = generateRandomText(rng, 1, 5);
+        if (i % 2 == 0) {
+            text += " " + commonWord;
+            // Add multiple occurrences for higher freq
+            if (i % 4 == 0) {
+                text += " " + commonWord + " " + commonWord;
+            }
+        }
+        datas.push_back(text);
+    }
+
+    RAMDirectory dir;
+    writeTestIndex(fieldName, &dir, IndexVersion::kV2, datas);
+
+    auto* reader = IndexReader::open(&dir);
+    std::exception_ptr eptr;
+
+    try {
+        std::wstring ws = StringUtil::string_to_wstring(commonWord);
+        std::wstring fieldNameW = StringUtil::string_to_wstring(fieldName);
+        Term* term = _CLNEW Term(fieldNameW.c_str(), ws.c_str());
+
+        // Collect all docs using next() as baseline
+        TermDocs* baselineDocs = reader->termDocs();
+        baselineDocs->seek(term);
+        std::vector<int32_t> baselineDocIds;
+        std::vector<int32_t> baselineFreqs;
+        while (baselineDocs->next()) {
+            baselineDocIds.push_back(baselineDocs->doc());
+            baselineFreqs.push_back(baselineDocs->freq());
+        }
+        baselineDocs->close();
+        _CLDELETE(baselineDocs);
+
+        // Test: skipToBlock + readBlock + 
getMaxBlockFreq/getMaxBlockNorm/getLastDocInBlock
+        std::vector<int32_t> skipTargets = {0, 500, 2000, 5000, 8000};
+
+        for (int32_t target : skipTargets) {
+            TermDocs* termDocs = reader->termDocs();
+            termDocs->seek(term);
+
+            std::vector<int32_t> collectedDocs;
+            std::vector<int32_t> collectedFreqs;
+
+            // Skip to target block
+            bool skipped = termDocs->skipToBlock(target);
+
+            // Verify getLastDocInBlock returns valid value after skip
+            int32_t lastDocInBlock = termDocs->getLastDocInBlock();
+            if (skipped && lastDocInBlock >= 0) {
+                assertTrue(lastDocInBlock >= 0);
+            }
+
+            // Read blocks and verify maxBlockFreq/maxBlockNorm
+            DocRange docRange;
+            while (termDocs->readBlock(&docRange)) {
+                int32_t maxFreq = termDocs->getMaxBlockFreq();
+                int32_t maxNorm = termDocs->getMaxBlockNorm();
+                lastDocInBlock = termDocs->getLastDocInBlock();
+
+                // Verify maxBlockFreq is >= max freq in current block
+                int32_t actualMaxFreq = 0;
+                for (uint32_t i = 0; i < docRange.doc_many_size_; ++i) {
+                    int32_t doc = (*docRange.doc_many)[i];
+                    int32_t freq = (docRange.freq_many && i < 
docRange.freq_many_size_)
+                                           ? (*docRange.freq_many)[i]
+                                           : 1;
+                    if (doc >= target) {
+                        collectedDocs.push_back(doc);
+                        collectedFreqs.push_back(freq);
+                    }
+                    if (freq > actualMaxFreq) {
+                        actualMaxFreq = freq;
+                    }
+                }
+
+                // maxBlockFreq should be >= actual max freq in block (if 
valid)
+                if (maxFreq > 0) {
+                    assertTrue(maxFreq >= actualMaxFreq);
+                }
+
+                // lastDocInBlock should be >= last doc in current block
+                if (lastDocInBlock >= 0 && docRange.doc_many_size_ > 0) {
+                    int32_t lastDocInRange = 
(*docRange.doc_many)[docRange.doc_many_size_ - 1];
+                    assertTrue(lastDocInBlock >= lastDocInRange);
+                }
+            }
+
+            // Verify collected docs match baseline (docs >= target)
+            std::vector<int32_t> expectedDocs;
+            for (size_t i = 0; i < baselineDocIds.size(); ++i) {
+                if (baselineDocIds[i] >= target) {
+                    expectedDocs.push_back(baselineDocIds[i]);
+                }
+            }
+
+            // All expected docs should be in collected docs
+            for (int32_t expectedDoc : expectedDocs) {
+                bool found = std::find(collectedDocs.begin(), 
collectedDocs.end(), expectedDoc) !=
+                             collectedDocs.end();
+                if (!found) {
+                    std::cerr << "Missing doc " << expectedDoc << " after 
skipToBlock(" << target
+                              << ")" << std::endl;
+                }
+                assertTrue(found);
+            }
+
+            termDocs->close();
+            _CLDELETE(termDocs);
+        }
+
+        _CLDECDELETE(term);
+
+    } catch (...) {
+        eptr = std::current_exception();
+    }
+
+    FINALLY(eptr, {
+        reader->close();
+        _CLLDELETE(reader);
+    })
+
+    std::cout << "\nTestBlockBasedInterfaces success" << std::endl;
+}
+
 //=============================================================================
 // Suite registration
 //=============================================================================
@@ -861,6 +999,7 @@ CuSuite* testReadRange() {
     SUITE_ADD_TEST(suite, TestReadRangeLargeDataset);
     SUITE_ADD_TEST(suite, TestReadRangeVersions);
     SUITE_ADD_TEST(suite, TestReadRangeEdgeCases);
+    SUITE_ADD_TEST(suite, TestBlockBasedInterfaces);
 
     return suite;
 }
diff --git a/src/test/posting/TestBlockMaxScoreV3.cpp 
b/src/test/posting/TestBlockMaxScoreV3.cpp
index efa67b66a98..6204dc72879 100644
--- a/src/test/posting/TestBlockMaxScoreV3.cpp
+++ b/src/test/posting/TestBlockMaxScoreV3.cpp
@@ -116,8 +116,8 @@ void testDefaultSkipListWriterMaxBlockParamsV1(CuTest* tc) {
             int32_t actualMaxBlockFreq = reader->getMaxBlockFreq();
             int32_t actualMaxBlockNorm = reader->getMaxBlockNorm();
 
-            CuAssertTrue(tc, actualMaxBlockFreq == 0);
-            CuAssertTrue(tc, actualMaxBlockNorm == 0);
+            CuAssertTrue(tc, actualMaxBlockFreq == -1);
+            CuAssertTrue(tc, actualMaxBlockNorm == -1);
         }
 
         _CLDELETE(reader);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris-thirdparty) branch clucene updated: [opt](inverted index) implement block max WAND algorithm with BM25 similarity for query optimization (#374)

Reply via email to