This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch dev_0308_3
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git

commit 4950660466283dafab7f15a1f8019037c7aa0d6f
Author: BiteTheDDDDt <pxl...@qq.com>
AuthorDate: Fri Mar 8 17:23:15 2024 +0800

    fix some implicit conversion
---
 .github/workflows/build.yml                        |   2 +-
 .github/workflows/clucene-ut.yml                   | 118 +++
 .../CLucene/analysis/LanguageBasedAnalyzer.cpp     |  14 +-
 .../CLucene/analysis/jieba/ChineseTokenizer.cpp    |   8 +-
 .../CLucene/analysis/jieba/ChineseTokenizer.h      |  25 +-
 src/core/CLucene/analysis/AnalysisHeader.h         |   8 +-
 src/core/CLucene/index/IndexWriter.cpp             |  95 ++-
 src/core/CLucene/index/IndexWriter.h               |  16 +
 src/core/CLucene/index/MultiSegmentReader.cpp      |   4 +
 src/core/CLucene/index/SegmentInfos.cpp            |   3 +
 src/core/CLucene/index/SegmentReader.cpp           |   7 +-
 src/core/CLucene/index/SegmentTermDocs.cpp         |   4 +-
 src/core/CLucene/index/SegmentTermEnum.cpp         | 801 ++++++++++-----------
 src/core/CLucene/index/TermInfosReader.cpp         | 737 ++++++++++---------
 src/core/CLucene/index/_SegmentHeader.h            |   4 +-
 src/core/CLucene/index/_SegmentTermEnum.h          |   3 +-
 src/core/CLucene/search/MultiPhraseQuery.cpp       |   4 +-
 src/core/CLucene/search/query/TermIterator.h       |  29 +-
 .../CLucene/search/query/TermPositionIterator.h    |  23 +
 src/core/CLucene/store/IndexOutput.cpp             |  13 +-
 src/core/CLucene/util/CLStreams.h                  |   2 +-
 src/core/CLucene/util/PriorityQueue.h              |   2 +-
 src/core/CLucene/util/bkd/bkd_docid_iterator.h     |   8 +-
 src/core/CLucene/util/stringUtil.h                 |  17 +-
 src/core/CMakeLists.txt                            |   2 +-
 src/test/CMakeLists.txt                            |   2 +
 src/test/index/TestIndexCompaction.cpp             | 344 +++++++++
 src/test/query/TestMultiPhraseQuery.cpp            | 163 +++++
 src/test/test.h                                    |   2 +
 src/test/tests.cpp                                 |   2 +
 30 files changed, 1623 insertions(+), 839 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 64386c999a..36ea60e962 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -82,7 +82,7 @@ jobs:
           cd /tmp
 
           curl -L 
https://sourceforge.net/projects/libpng/files/zlib/1.2.11/zlib-1.2.11.tar.gz | 
tar -zxf -
-          curl -L 
https://boostorg.jfrog.io/artifactory/main/release/1.81.0/source/boost_1_81_0.tar.gz
 -o - | tar -zxf -
+          curl -L 
https://archives.boost.io/release/1.81.0/source/boost_1_81_0.tar.gz -o - | tar 
-zxf -
 
           if [[ "${{ matrix.config.name }}" == 'macOS' ]]; then
             pushd "$(brew --repo)"
diff --git a/.github/workflows/clucene-ut.yml b/.github/workflows/clucene-ut.yml
new file mode 100644
index 0000000000..302934cbce
--- /dev/null
+++ b/.github/workflows/clucene-ut.yml
@@ -0,0 +1,118 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Clucene UT
+
+on:
+  pull_request:
+    branches:
+    - clucene
+    - clucene-2.0
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number }}
+  cancel-in-progress: true
+
+jobs:
+  build_linux:
+    name: Build (Linux)
+    runs-on: ubuntu-22.04
+    steps:
+    - name: "Checkout ${{ github.event.pull_request.number }} ${{ 
github.event.pull_request.head.sha }}"
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.sha }}
+
+    - name: "Prepare"
+      run: |
+        sudo apt update
+        sudo DEBIAN_FRONTEND=noninteractive apt install --yes \
+          'build-essential' \
+          'automake' \
+          'autoconf' \
+          'libtool-bin' \
+          'pkg-config' \
+          'cmake' \
+          'ninja-build' \
+          'ccache' \
+          'python-is-python3' \
+          'bison' \
+          'byacc' \
+          'flex' \
+          'binutils-dev' \
+          'libiberty-dev' \
+          'curl' \
+          'git' \
+          'zip' \
+          'unzip' \
+          'autopoint' \
+          'openjdk-11-jdk' \
+          'openjdk-11-jdk-headless' \
+          'maven'
+    - name: "Run"
+      run: |
+        set -x
+        mkdir build && cd build
+        cmake ../
+        make cl_test
+        cd bin/
+        ./cl_test
+      # - name: "Setup tmate session to debug"
+      #   if: ${{ failure() }}
+      #   uses: mxschmitt/action-tmate@v3
+      #   timeout-minutes: 30
+
+  build_macos:
+    name: Build (MacOS)
+    runs-on: macos-12
+    steps:
+    - name: "Checkout ${{ github.event.pull_request.number }} ${{ 
github.event.pull_request.head.sha }}"
+      uses: actions/checkout@v4
+      with:
+        ref: ${{ github.event.pull_request.head.sha }}
+
+    - name: "Prepare"
+      run: |
+        brew install \
+          'automake' \
+          'autoconf' \
+          'libtool' \
+          'pkg-config' \
+          'texinfo' \
+          'coreutils' \
+          'gnu-getopt' \
+          'python@3' \
+          'cmake' \
+          'ninja' \
+          'ccache' \
+          'bison' \
+          'byacc' \
+          'gettext' \
+          'wget' \
+          'pcre' \
+          'openjdk@11' \
+          'maven' \
+          'node' \
+          'llvm@16'
+    - name: "Run"
+      run: |
+        set -x
+        mkdir build && cd build
+        cmake ../
+        make cl_test
+        cd bin/
+        ./cl_test
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp 
b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
index 2a32ff04fa..6adfcf1e34 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
@@ -4,6 +4,7 @@
 * Distributable under the terms of either the Apache License (Version 2.0) or
 * the GNU Lesser General Public License, as specified in the COPYING file.
 
------------------------------------------------------------------------------*/
+#include <fstream>
 #include "CLucene/_ApiHeader.h"
 
 #include "CLucene/analysis/Analyzers.h"
@@ -64,7 +65,18 @@ void LanguageBasedAnalyzer::setMode(AnalyzerMode m) {
 
 void LanguageBasedAnalyzer::initDict(const std::string &dictPath) {
     if (_tcscmp(lang, _T("chinese")) == 0) {
-        CL_NS2(analysis, jieba)::ChineseTokenizer::init(dictPath);
+        ChineseDict chineseDict;
+        chineseDict.dictPath_ = dictPath;
+
+        for (const auto& file : chineseDict.files_) {
+            std::string path = dictPath + "/" + file;
+            std::ifstream in(path);
+            if (!in.good()) {
+                _CLTHROWA(CL_ERR_IO, std::string("chinese tokenizer dict file 
not found: " + path).c_str());
+            }
+        }
+
+        CL_NS2(analysis, jieba)::ChineseTokenizer::init(&chineseDict);
     }
 }
 
diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp 
b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
index 9a7f5eddfd..ef46315ff5 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
@@ -17,11 +17,11 @@ ChineseTokenizer::ChineseTokenizer(lucene::util::Reader 
*reader, AnalyzerMode m,
     Tokenizer::lowercase = lowercase;
 }
 
-void ChineseTokenizer::init(const std::string &dictPath) {
-    JiebaSingleton::getInstance(dictPath);
+void ChineseTokenizer::init(const ChineseDict* chineseDict) {
+    JiebaSingleton::getInstance(chineseDict);
 }
 
-CL_NS(analysis)::Token *ChineseTokenizer::next(lucene::analysis::Token *token) 
{
+CL_NS(analysis)::Token* ChineseTokenizer::next(lucene::analysis::Token* token) 
{
     if (bufferIndex >= dataLen) {
         return nullptr;
     }
@@ -29,7 +29,7 @@ CL_NS(analysis)::Token 
*ChineseTokenizer::next(lucene::analysis::Token *token) {
     std::string_view& token_text = tokens_text[bufferIndex++];
     size_t size = std::min(token_text.size(), 
static_cast<size_t>(LUCENE_MAX_WORD_LEN));
     if (Tokenizer::lowercase) {
-        if (!token_text.empty() && token_text[0] < 0x80) {
+        if (!token_text.empty() && static_cast<uint8_t>(token_text[0]) < 0x80) 
{
             std::transform(token_text.begin(), token_text.end(),
                            const_cast<char*>(token_text.data()),
                            [](char c) { return to_lower(c); });
diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h 
b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
index 9fe33f5805..09760b7b1c 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
@@ -14,14 +14,25 @@
 CL_NS_DEF2(analysis,jieba)
 CL_NS_USE(analysis)
 
+struct ChineseDict {
+    std::string dictPath_;
+    std::vector<std::string> files_ = {
+        "jieba.dict.utf8",
+        "hmm_model.utf8",
+        "user.dict.utf8",
+        "idf.utf8",
+        "stop_words.utf8"
+    };
+};
+
 class JiebaSingleton {
 public:
-    static cppjieba::Jieba& getInstance(const std::string& dictPath = "") {
-        static cppjieba::Jieba instance(dictPath + "/" + "jieba.dict.utf8",
-                                        dictPath + "/" + "hmm_model.utf8",
-                                        dictPath + "/" + "user.dict.utf8",
-                                        dictPath + "/" + "idf.utf8",
-                                        dictPath + "/" + "stop_words.utf8");
+    static cppjieba::Jieba& getInstance(const ChineseDict* dict = nullptr) {
+        static cppjieba::Jieba instance(dict->dictPath_ + "/" + 
dict->files_[0],
+                                        dict->dictPath_ + "/" + 
dict->files_[1],
+                                        dict->dictPath_ + "/" + 
dict->files_[2],
+                                        dict->dictPath_ + "/" + 
dict->files_[3],
+                                        dict->dictPath_ + "/" + 
dict->files_[4]);
         return instance;
     }
 
@@ -46,7 +57,7 @@ public:
     // Constructor
     explicit ChineseTokenizer(lucene::util::Reader *reader, AnalyzerMode mode);
     explicit ChineseTokenizer(lucene::util::Reader *reader, AnalyzerMode mode, 
bool lowercase);
-    static void init(const std::string& dictPath="");
+    static void init(const ChineseDict* chineseDict);
 
     // Destructor
     ~ChineseTokenizer() override = default;
diff --git a/src/core/CLucene/analysis/AnalysisHeader.h 
b/src/core/CLucene/analysis/AnalysisHeader.h
index 578d8e0061..a98e26e4ab 100644
--- a/src/core/CLucene/analysis/AnalysisHeader.h
+++ b/src/core/CLucene/analysis/AnalysisHeader.h
@@ -219,15 +219,15 @@ public:
 template <>
 inline size_t Token::termLength<char>(){
     if ( _termTextLen == -1 ) //it was invalidated by growBuffer
-        _termTextLen = strlen((char*)_buffer);
-    return _termTextLen;
+        _termTextLen = (int32_t)strlen((char*)_buffer);
+    return (size_t)_termTextLen;
 };
 
 template <>
 inline size_t Token::termLength<TCHAR>(){
     if ( _termTextLen == -1 ) //it was invalidated by growBuffer
-        _termTextLen = wcslen((TCHAR*)_buffer);
-    return _termTextLen;
+        _termTextLen = (int32_t)wcslen((TCHAR*)_buffer);
+    return (size_t)_termTextLen;
 };
 
 class CLUCENE_EXPORT TokenStream {
diff --git a/src/core/CLucene/index/IndexWriter.cpp 
b/src/core/CLucene/index/IndexWriter.cpp
index 71cf4f2cac..e30abf5107 100644
--- a/src/core/CLucene/index/IndexWriter.cpp
+++ b/src/core/CLucene/index/IndexWriter.cpp
@@ -40,6 +40,13 @@
 #include <memory>
 #include <assert.h>
 #include <iostream>
+#include <roaring/roaring.hh>
+
+#define FINALLY_CLOSE_OUTPUT(x)       \
+    try {                             \
+        if (x != nullptr) x->close(); \
+    } catch (...) {                   \
+    }
 
 CL_NS_USE(store)
 CL_NS_USE(util)
@@ -50,6 +57,7 @@ CL_NS_DEF(index)
 
 int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000;
 const char *IndexWriter::WRITE_LOCK_NAME = "write.lock";
+const char *IndexWriter::NULL_BITMAP_FILE_NAME = "null_bitmap";
 std::ostream *IndexWriter::defaultInfoStream = NULL;
 
 const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096;
@@ -1255,18 +1263,43 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
     int numIndices = src_dirs.size();
 
     //Set of IndexReaders
-    if (infoStream != NULL) {
+    if (infoStream != nullptr) {
         message(string("src index dir size: ") + Misc::toString(numIndices));
     }
+
+    // first level vector index is src_index_id
+    // second level vector index is src_doc_id
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues(numIndices);
+    IndexInput* null_bitmap_in = nullptr;
     for (int32_t i = 0; i < numIndices; i++) {
         // One index dir may have more than one segment, so we change the code 
to open all segments by using IndexReader::open
         // To keep the number of readers consistent with the number of src 
dirs.
         // Using IndexWriter::segmentInfos will be incorrect when there are 
more than one segment in one index dir
         IndexReader* reader = lucene::index::IndexReader::open(src_dirs[i], 
MERGE_READ_BUFFER_SIZE, false);
         readers.push_back(reader);
-        if (infoStream != NULL) {
+        if (infoStream != nullptr) {
             message(src_dirs[i]->toString());
         }
+
+        // read null_bitmap and store values in srcBitmapValues
+        try {
+            if (src_dirs[i]->fileExists(NULL_BITMAP_FILE_NAME)) {
+                // get null_bitmap index input
+                null_bitmap_in = src_dirs[i]->openInput(NULL_BITMAP_FILE_NAME);
+                size_t null_bitmap_size = null_bitmap_in->length();
+                std::string buf;
+                buf.resize(null_bitmap_size);
+                
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(const_cast<char*>(buf.data())),
 null_bitmap_size);
+                auto null_bitmap = roaring::Roaring::read(buf.data(), false);
+                null_bitmap.runOptimize();
+                for (unsigned int v : null_bitmap) {
+                    srcNullBitmapValues[i].emplace_back(v);
+                }
+                FINALLY_CLOSE_OUTPUT(null_bitmap_in);
+            }
+        } catch (CLuceneError &e) {
+            FINALLY_CLOSE_OUTPUT(null_bitmap_in);
+        }
     }
     assert(readers.size() == numIndices);
 
@@ -1302,6 +1335,7 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
     docStoreSegment.clear();
 
     std::vector<lucene::index::IndexWriter *> destIndexWriterList;
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
     try {
         /// merge fields
         mergeFields(hasProx);
@@ -1345,10 +1379,17 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
             skipInterval = termInfosWriter->skipInterval;
             maxSkipLevels = termInfosWriter->maxSkipLevels;
             skipListWriterList.push_back(_CLNEW 
DefaultSkipListWriter(skipInterval, maxSkipLevels, (int) dest_index_docs[j], 
freqOutputList[j], proxOutputList[j]));
+
+            // create null_bitmap index output
+            auto* null_bitmap_out = 
dest_dir->createOutput(NULL_BITMAP_FILE_NAME);
+            nullBitmapIndexOutputList.push_back(null_bitmap_out);
         }
 
         /// merge terms
         mergeTerms(hasProx);
+
+        /// merge null_bitmap
+        mergeNullBitmap(srcNullBitmapValues, nullBitmapIndexOutputList);
     } catch (CLuceneError &e) {
         throw e;
     }
@@ -1387,6 +1428,13 @@ void 
IndexWriter::indexCompaction(std::vector<lucene::store::Directory *> &src_d
                     _CLDELETE(r);
                 }
             } readers.clear(););
+            for (auto* null_bitmap_out
+                 : nullBitmapIndexOutputList) {
+                if (null_bitmap_out != nullptr) {
+                    null_bitmap_out->close();
+                    _CLDELETE(null_bitmap_out);
+                }
+            } nullBitmapIndexOutputList.clear();
 
     // update segment infos of dest index_writer in memory
     // close dest index writer
@@ -1818,6 +1866,49 @@ void IndexWriter::mergeTerms(bool hasProx) {
     }
 }
 
+void IndexWriter::mergeNullBitmap(std::vector<std::vector<uint32_t>> 
srcNullBitmapValues, std::vector<lucene::store::IndexOutput *> 
nullBitmapIndexOutputList) {
+    // first level vector index is dest_index_id
+    // second level vector index is dest_doc_id
+    std::vector<std::vector<uint32_t>> destNullBitmapValues(numDestIndexes);
+
+    // iterate srcNullBitmapValues to construct destNullBitmapValues
+    for (size_t i = 0; i < srcNullBitmapValues.size(); ++i) {
+        std::vector<uint32_t> &indexSrcBitmapValues = srcNullBitmapValues[i];
+        if (indexSrcBitmapValues.empty()) {
+            // empty indicates there is no null_bitmap file in this index
+            continue;
+        }
+        for (const auto& srcDocId : indexSrcBitmapValues) {
+            auto destIdx = _trans_vec[i][srcDocId].first;
+            auto destDocId = _trans_vec[i][srcDocId].second;
+            // <UINT32_MAX, UINT32_MAX> indicates current row not exist in 
Doris dest segment.
+            // So we ignore this doc here.
+            if (destIdx == UINT32_MAX || destDocId == UINT32_MAX) {
+                continue;
+            }
+            destNullBitmapValues[destIdx].emplace_back(destDocId);
+        }
+    }
+
+    // construct null_bitmap and write null_bitmap to dest index
+    for (size_t i = 0; i < destNullBitmapValues.size(); ++i) {
+        roaring::Roaring null_bitmap;
+        for (const auto& v : destNullBitmapValues[i]) {
+            null_bitmap.add(v);
+        }
+        // write null_bitmap file
+        auto* nullBitmapIndexOutput = nullBitmapIndexOutputList[i];
+        null_bitmap.runOptimize();
+        size_t size = null_bitmap.getSizeInBytes(false);
+        if (size > 0) {
+            std::string buf;
+            buf.resize(size);
+            null_bitmap.write(reinterpret_cast<char*>(buf.data()), false);
+            
nullBitmapIndexOutput->writeBytes(reinterpret_cast<uint8_t*>(buf.data()), size);
+        }
+    }
+}
+
 void 
IndexWriter::addIndexesNoOptimize(CL_NS(util)::ArrayBase<CL_NS(store)::Directory
 *> &dirs) {
     ensureOpen();
 
diff --git a/src/core/CLucene/index/IndexWriter.h 
b/src/core/CLucene/index/IndexWriter.h
index 719ce0e5dc..7cfb67d2ca 100644
--- a/src/core/CLucene/index/IndexWriter.h
+++ b/src/core/CLucene/index/IndexWriter.h
@@ -325,10 +325,21 @@ public:
     void writeFields(lucene::store::Directory* d, std::string segment);
     // merge terms and write files
     void mergeTerms(bool hasProx);
+    // merge null_bitmap
+    void mergeNullBitmap(std::vector<std::vector<uint32_t>> srcBitmapValues, 
std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList);
 
     // Compare current index with the other
     void compareIndexes(lucene::store::Directory* other);
 
+    // only for tests
+    void setNumDestIndexes(int32_t num_dest_indexes) {
+        numDestIndexes = num_dest_indexes;
+    }
+    // only for tests
+    void setTransVec(std::vector<std::vector<std::pair<uint32_t, uint32_t>>> 
trans_vec) {
+      _trans_vec = std::move(trans_vec);
+    }
+
        // Release the write lock, if needed.
        SegmentInfos* segmentInfos;
 
@@ -414,6 +425,11 @@ public:
    */
   static const char* WRITE_LOCK_NAME; //"write.lock";
 
+  /**
+   * Name of the null bitmap in the index.
+   */
+  static const char* NULL_BITMAP_FILE_NAME; //"null_bitmap";
+
   /**
    * @deprecated
    * @see LogMergePolicy#DEFAULT_MERGE_FACTOR
diff --git a/src/core/CLucene/index/MultiSegmentReader.cpp 
b/src/core/CLucene/index/MultiSegmentReader.cpp
index ad37807e1a..b4be5f0129 100644
--- a/src/core/CLucene/index/MultiSegmentReader.cpp
+++ b/src/core/CLucene/index/MultiSegmentReader.cpp
@@ -561,6 +561,10 @@ int32_t MultiTermDocs::docFreq() {
 
 int32_t MultiTermDocs::doc() const {
   CND_PRECONDITION(current!=NULL,"current==NULL, check that next() was 
called");
+  // if not found term, current will return INT_MAX, we could not add base, 
otherwise it will overflow.
+  if (current->doc() == LUCENE_INT32_MAX_SHOULDBE) {
+      return LUCENE_INT32_MAX_SHOULDBE;
+  }
   return base + current->doc();
 }
 int32_t MultiTermDocs::freq() const {
diff --git a/src/core/CLucene/index/SegmentInfos.cpp 
b/src/core/CLucene/index/SegmentInfos.cpp
index 035321295e..60a3695474 100644
--- a/src/core/CLucene/index/SegmentInfos.cpp
+++ b/src/core/CLucene/index/SegmentInfos.cpp
@@ -826,6 +826,9 @@ string SegmentInfo::segString(Directory* dir) {
           // Try not to leave a truncated segments_N file in
           // the index:
           directory->deleteFile(segmentFileName.c_str());
+          if (output != nullptr) {
+              _CLDELETE(output);
+          }
         }
       )
     )
diff --git a/src/core/CLucene/index/SegmentReader.cpp 
b/src/core/CLucene/index/SegmentReader.cpp
index f7741a9f80..721263664f 100644
--- a/src/core/CLucene/index/SegmentReader.cpp
+++ b/src/core/CLucene/index/SegmentReader.cpp
@@ -257,7 +257,12 @@ SegmentReader *SegmentReader::get(Directory *dir, 
SegmentInfo *si,
     instance->init(dir, sis, closeDir);
     // TODO: make this configurable...
     bool fieldsReaderExist = false;
-    instance->initialize(si, readBufferSize == -1 ? 
BufferedIndexInput::BUFFER_SIZE : readBufferSize, doOpenStores, 
fieldsReaderExist);
+    try {
+        instance->initialize(si, readBufferSize == -1 ? 
BufferedIndexInput::BUFFER_SIZE : readBufferSize, doOpenStores, 
fieldsReaderExist);
+    } catch (CLuceneError& e) {
+        _CLDELETE(instance)
+        throw e;
+    }
     return instance;
 }
 
diff --git a/src/core/CLucene/index/SegmentTermDocs.cpp 
b/src/core/CLucene/index/SegmentTermDocs.cpp
index 9108f1dfd5..e346dc0ca2 100644
--- a/src/core/CLucene/index/SegmentTermDocs.cpp
+++ b/src/core/CLucene/index/SegmentTermDocs.cpp
@@ -19,7 +19,7 @@
 CL_NS_DEF(index)
 
 SegmentTermDocs::SegmentTermDocs(const SegmentReader *_parent) : 
parent(_parent), freqStream(_parent->freqStream->clone()),
-                                                                 count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(0), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
+                                                                 count(0), 
df(0), deletedDocs(_parent->deletedDocs), _doc(-1), _freq(0), 
skipInterval(_parent->tis->getSkipInterval()),
                                                                  
maxSkipLevels(_parent->tis->getMaxSkipLevels()), skipListReader(NULL), 
freqBasePointer(0), proxBasePointer(0),
                                                                  
skipPointer(0), haveSkipped(false), pointer(0), pointerMax(0), 
indexVersion_(_parent->_fieldInfos->getIndexVersion()),
                                                                  
hasProx(_parent->_fieldInfos->hasProx()), buffer_(freqStream, hasProx, 
indexVersion_) {
@@ -73,7 +73,7 @@ void SegmentTermDocs::seek(const TermInfo *ti, Term *term) {
         df = 0;
     } else {// punt case
         df = ti->docFreq;
-        _doc = 0;
+        _doc = -1;
         freqBasePointer = ti->freqPointer;
         proxBasePointer = ti->proxPointer;
         skipPointer = freqBasePointer + ti->skipOffset;
diff --git a/src/core/CLucene/index/SegmentTermEnum.cpp 
b/src/core/CLucene/index/SegmentTermEnum.cpp
index 574d939643..8179c7b780 100644
--- a/src/core/CLucene/index/SegmentTermEnum.cpp
+++ b/src/core/CLucene/index/SegmentTermEnum.cpp
@@ -5,424 +5,419 @@
 * the GNU Lesser General Public License, as specified in the COPYING file.
 
------------------------------------------------------------------------------*/
 #include "CLucene/_ApiHeader.h"
-#include "_SegmentHeader.h"
-#include "_SegmentTermEnum.h"
-
+#include "Term.h"
 #include "Terms.h"
 #include "_FieldInfos.h"
-#include "Term.h"
+#include "_SegmentHeader.h"
+#include "_SegmentTermEnum.h"
 #include "_TermInfo.h"
 #include "_TermInfosWriter.h"
 
 CL_NS_USE(store)
 CL_NS_DEF(index)
 
-       SegmentTermEnum::SegmentTermEnum(IndexInput* i, FieldInfos* fis, const 
bool isi, int32_t in_format):
-               fieldInfos(fis){
-       //Func - Constructor
-       //Pre  - i holds a reference to an instance of IndexInput
-       //       fis holds a reference to an instance of FieldInfos
-       //       isi
-       //Post - An instance of SegmentTermEnum has been created
-               input            = i;
-               position     = -1;
-               //Instantiate a Term with empty field, empty text and which is 
interned (see term.h what interned means)
-           _term         = _CLNEW Term;
-               isIndex      = isi;
-               termInfo     = _CLNEW TermInfo();
-               indexPointer = 0;
-               buffer       = NULL;
-               bufferLength = 0;
-               prev         = NULL;
-               formatM1SkipInterval = 0;
-               maxSkipLevels = 1;
-               
-               //Set isClone to false as the instance is not clone of another 
instance
-               isClone      = false;
-
-               int32_t firstInt = in_format == -4 ? in_format : 
input->readInt();
+SegmentTermEnum::SegmentTermEnum(IndexInput* i, FieldInfos* fis, const bool 
isi)
+        : fieldInfos(fis) {
+    //Func - Constructor
+    //Pre  - i holds a reference to an instance of IndexInput
+    //       fis holds a reference to an instance of FieldInfos
+    //       isi
+    //Post - An instance of SegmentTermEnum has been created
+    input = i;
+    position = -1;
+    //Instantiate a Term with empty field, empty text and which is interned 
(see term.h what interned means)
+    _term = _CLNEW Term;
+    isIndex = isi;
+    termInfo = _CLNEW TermInfo();
+    indexPointer = 0;
+    buffer = NULL;
+    bufferLength = 0;
+    prev = NULL;
+    formatM1SkipInterval = 0;
+    maxSkipLevels = 1;
+
+    //Set isClone to false as the instance is not clone of another instance
+    isClone = false;
+}
+
+void SegmentTermEnum::init(int32_t in_format) {
+    int32_t firstInt = in_format == -4 ? in_format : input->readInt();
 
     if (firstInt >= 0) {
-         // original-format file, without explicit format version number
-         format = 0;
-         size = firstInt;
+        // original-format file, without explicit format version number
+        format = 0;
+        size = firstInt;
 
-         // back-compatible settings
-         indexInterval = 128;
-         skipInterval = LUCENE_INT32_MAX_SHOULDBE; // switch off skipTo 
optimization
+        // back-compatible settings
+        indexInterval = 128;
+        skipInterval = LUCENE_INT32_MAX_SHOULDBE; // switch off skipTo 
optimization
 
-      } else {
-         // we have a format version number
-         format = firstInt;
+    } else {
+        // we have a format version number
+        format = firstInt;
 
-         // check that it is a format we can understand
-         if (format < TermInfosWriter::FORMAT){
+        // check that it is a format we can understand
+        if (format < TermInfosWriter::FORMAT) {
             TCHAR err[30];
-            _sntprintf(err,30,_T("Unknown format version: %d"), format);
-            _CLTHROWT(CL_ERR_CorruptIndex,err);
-         }
-
-                                if (format == -4) {
-                                               if (isIndex) {
-                                                       size = 
input->readLong();
-                                                       if (size < 0) {
-                                                               auto pos = 
input->getFilePointer();
-                                                               
input->seek(input->length() - 16);
-                                                               size = 
input->readLong();
-                                                               tisSize = 
input->readLong();
-                                                               
input->seek(pos);
-                                                       }
-                                                       
-                                                       indexInterval = 
input->readInt();
-                                                       skipInterval = 
input->readInt();
-                                                       maxSkipLevels = 
input->readInt();
-                                               }
-                                } else {
-                                               size = input->readLong();       
             // read the size
-                                               if (size < 0) {                 
             // read the size at file footer, if size < 0
-                                                               auto pos = 
input->getFilePointer();
-                                                               
input->seek(input->length() - 8);
-                                                               size = 
input->readLong();
-                                                               
input->seek(pos);
-                                               }
-                                               
-                                               if(format == -1){
-                                                               if (!isIndex) {
-                                                                       
indexInterval = input->readInt();
-                                                                       
formatM1SkipInterval = input->readInt();
-                                                               }
-                                                               // switch off 
skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in 
-                                                               // skipTo 
implementation of these versions
-                                                               skipInterval = 
LUCENE_INT32_MAX_SHOULDBE;
-                                               }else{
-                                                               indexInterval = 
input->readInt();
-                                                               skipInterval = 
input->readInt();
-                                                               if ( format == 
-3 ) {
-                                                                       // this 
new format introduces multi-level skipping
-                                                                       
maxSkipLevels = input->readInt();
-                                                               }
-                                               }
-                                }
-      }
-       }
-
-       SegmentTermEnum::SegmentTermEnum(const SegmentTermEnum& clone):
-               fieldInfos(clone.fieldInfos)
-       {
-       //Func - Constructor
-       //       The instance is created by cloning all properties of clone
-       //Pre  - clone holds a valid reference to SegmentTermEnum
-       //Post - An instance of SegmentTermEnum with the same properties as 
clone
-               
-               input            = clone.input->clone();
-               //Copy the postion from the clone
-               position     = clone.position;
-
-        if ( clone._term != NULL ){
-                       _term         = _CLNEW Term;
-                       _term->set(clone._term,clone._term->text());
-               }else
-                       _term = NULL;
-               isIndex      = clone.isIndex;
-               termInfo     = _CLNEW TermInfo(clone.termInfo);
-               indexPointer = clone.indexPointer;
-               buffer       = 
clone.buffer==NULL?NULL:(TCHAR*)malloc(sizeof(TCHAR) * (clone.bufferLength+1));
-               bufferLength = clone.bufferLength;
-               prev         = clone.prev==NULL?NULL:_CLNEW 
Term(clone.prev->field(),clone.prev->text(),false);
-               size         = clone.size;
-               tisSize     = clone.tisSize;
-
-      format       = clone.format;
-      indexInterval= clone.indexInterval;
-      skipInterval = clone.skipInterval;
-      formatM1SkipInterval = clone.formatM1SkipInterval;
-      maxSkipLevels = clone.maxSkipLevels;
-      
-               //Set isClone to true as this instance is a clone of another 
instance
-               isClone      = true;
-
-               //Copy the contents of buffer of clone to the buffer of this 
instance
-               if ( clone.buffer != NULL )
-                       memcpy(buffer,clone.buffer,bufferLength * 
sizeof(TCHAR));
-       }
-
-       SegmentTermEnum::~SegmentTermEnum(){
-       //Func - Destructor
-       //Pre  - true
-       //Post - The instance has been destroyed. If this instance was a clone
-       //       then the inputstream is closed and deleted too.
-
-        //todo: revisit this... close() should clean up most of everything.
-
-               //Finalize prev
-               _CLDECDELETE(prev );
-               //Finalize term
-               _CLDECDELETE( _term );
-               
-
-               //Delete the buffer if necessary
-               if ( buffer != NULL ) free(buffer);
-               //Delete termInfo if necessary
-               _CLDELETE(termInfo);
-
-               //Check if this instance is a clone
-               if ( isClone ){
-                       //Close the inputstream
-                       input->close();
-                       //delete the inputstream
-                       _CLDELETE(input);
-                       }
-       }
-
-       void SegmentTermEnum::initByTii(SegmentTermEnum* tii) {
-               if (format == -4) {
-                       size = tii->tisSize;
-                       indexInterval = tii->indexInterval;
-                       skipInterval = tii->skipInterval;
-                       maxSkipLevels = tii->maxSkipLevels;
-                       size_t header = sizeof(format) +
-                                                                               
        sizeof(size) +
-                                                                               
        sizeof(indexInterval) + 
-                                                                               
        sizeof(skipInterval) + 
-                                                                               
        sizeof(maxSkipLevels);
-                       input->seek(header);
-               }
-       }
-
-       const char* SegmentTermEnum::getObjectName() const{ return 
getClassName(); }
-       const char* SegmentTermEnum::getClassName(){ return "SegmentTermEnum"; }
-
-       bool SegmentTermEnum::next(){
-       //Func - Moves the current of the set to the next in the set
-       //Pre  - true
-       //Post - If the end has been reached NULL is returned otherwise the 
term has
-       //       become the next Term in the enumeration
-
-               //Increase position by and and check if the end has been reached
-               if (position++ >= size-1) {
-                       //delete term
-                       _CLDECDELETE(_term);
-                       return false;
-               }
-
-               //delete the previous enumerated term
-               Term* tmp=NULL;
-               if ( prev != NULL ){
-                       if ( _LUCENE_ATOMIC_INT_GET(prev->__cl_refcount) > 1 ){
-                               _CLDECDELETE(prev); //todo: tune other places 
try and delete its term 
-                       }else
-                               tmp = prev; //we are going to re-use this term
-               }
-               //prev becomes the current enumerated term
-               prev = _term;
-               //term becomes the next term read from inputStream input
-               _term = readTerm(tmp);
-
-               //Read docFreq, the number of documents which contain the term.
-               termInfo->docFreq = input->readVInt();
-               //Read freqPointer, a pointer into the TermFreqs file (.frq)
-               termInfo->freqPointer += input->readVLong();
-               
-               //Read proxPointer, a pointer into the TermPosition file (.prx).
-               termInfo->proxPointer += input->readVLong();
-
-      if(format == -1){
-         //  just read skipOffset in order to increment  file pointer; 
-         // value is never used since skipTo is switched off
-         if (!isIndex) {
+            _sntprintf(err, 30, _T("Unknown format version: %d"), format);
+            _CLTHROWT(CL_ERR_CorruptIndex, err);
+        }
+
+        if (format == -4) {
+            if (isIndex) {
+                size = input->readLong();
+                if (size < 0) {
+                    auto pos = input->getFilePointer();
+                    input->seek(input->length() - 16);
+                    size = input->readLong();
+                    tisSize = input->readLong();
+                    input->seek(pos);
+                }
+
+                indexInterval = input->readInt();
+                skipInterval = input->readInt();
+                maxSkipLevels = input->readInt();
+            }
+        } else {
+            size = input->readLong(); // read the size
+            if (size < 0) {           // read the size at file footer, if size 
< 0
+                auto pos = input->getFilePointer();
+                input->seek(input->length() - 8);
+                size = input->readLong();
+                input->seek(pos);
+            }
+
+            if (format == -1) {
+                if (!isIndex) {
+                    indexInterval = input->readInt();
+                    formatM1SkipInterval = input->readInt();
+                }
+                // switch off skipTo optimization for file format prior to 
1.4rc2 in order to avoid a bug in
+                // skipTo implementation of these versions
+                skipInterval = LUCENE_INT32_MAX_SHOULDBE;
+            } else {
+                indexInterval = input->readInt();
+                skipInterval = input->readInt();
+                if (format == -3) {
+                    // this new format introduces multi-level skipping
+                    maxSkipLevels = input->readInt();
+                }
+            }
+        }
+    }
+}
+
+SegmentTermEnum::SegmentTermEnum(const SegmentTermEnum& clone) : 
fieldInfos(clone.fieldInfos) {
+    //Func - Constructor
+    //       The instance is created by cloning all properties of clone
+    //Pre  - clone holds a valid reference to SegmentTermEnum
+    //Post - An instance of SegmentTermEnum with the same properties as clone
+
+    input = clone.input->clone();
+    //Copy the postion from the clone
+    position = clone.position;
+
+    if (clone._term != NULL) {
+        _term = _CLNEW Term;
+        _term->set(clone._term, clone._term->text());
+    } else
+        _term = NULL;
+    isIndex = clone.isIndex;
+    termInfo = _CLNEW TermInfo(clone.termInfo);
+    indexPointer = clone.indexPointer;
+    buffer = clone.buffer == NULL ? NULL : (TCHAR*)malloc(sizeof(TCHAR) * 
(clone.bufferLength + 1));
+    bufferLength = clone.bufferLength;
+    prev = clone.prev == NULL ? NULL : _CLNEW Term(clone.prev->field(), 
clone.prev->text(), false);
+    size = clone.size;
+    tisSize = clone.tisSize;
+
+    format = clone.format;
+    indexInterval = clone.indexInterval;
+    skipInterval = clone.skipInterval;
+    formatM1SkipInterval = clone.formatM1SkipInterval;
+    maxSkipLevels = clone.maxSkipLevels;
+
+    //Set isClone to true as this instance is a clone of another instance
+    isClone = true;
+
+    //Copy the contents of buffer of clone to the buffer of this instance
+    if (clone.buffer != NULL) memcpy(buffer, clone.buffer, bufferLength * 
sizeof(TCHAR));
+}
+
+SegmentTermEnum::~SegmentTermEnum() {
+    //Func - Destructor
+    //Pre  - true
+    //Post - The instance has been destroyed. If this instance was a clone
+    //       then the inputstream is closed and deleted too.
+
+    //todo: revisit this... close() should clean up most of everything.
+
+    //Finalize prev
+    _CLDECDELETE(prev);
+    //Finalize term
+    _CLDECDELETE(_term);
+
+    //Delete the buffer if necessary
+    if (buffer != NULL) free(buffer);
+    //Delete termInfo if necessary
+    _CLDELETE(termInfo);
+
+    //Check if this instance is a clone
+    if (isClone) {
+        //Close the inputstream
+        input->close();
+        //delete the inputstream
+        _CLDELETE(input);
+    }
+}
+
+void SegmentTermEnum::initByTii(SegmentTermEnum* tii) {
+    if (format == -4) {
+        size = tii->tisSize;
+        indexInterval = tii->indexInterval;
+        skipInterval = tii->skipInterval;
+        maxSkipLevels = tii->maxSkipLevels;
+        size_t header = sizeof(format) + sizeof(size) + sizeof(indexInterval) +
+                        sizeof(skipInterval) + sizeof(maxSkipLevels);
+        input->seek(header);
+    }
+}
+
+const char* SegmentTermEnum::getObjectName() const {
+    return getClassName();
+}
+const char* SegmentTermEnum::getClassName() {
+    return "SegmentTermEnum";
+}
+
+bool SegmentTermEnum::next() {
+    //Func - Moves the current of the set to the next in the set
+    //Pre  - true
+    //Post - If the end has been reached NULL is returned otherwise the term 
has
+    //       become the next Term in the enumeration
+
+    //Increase position by and and check if the end has been reached
+    if (position++ >= size - 1) {
+        //delete term
+        _CLDECDELETE(_term);
+        return false;
+    }
+
+    //delete the previous enumerated term
+    Term* tmp = NULL;
+    if (prev != NULL) {
+        if (_LUCENE_ATOMIC_INT_GET(prev->__cl_refcount) > 1) {
+            _CLDECDELETE(prev); //todo: tune other places try and delete its 
term
+        } else
+            tmp = prev; //we are going to re-use this term
+    }
+    //prev becomes the current enumerated term
+    prev = _term;
+    //term becomes the next term read from inputStream input
+    _term = readTerm(tmp);
+
+    //Read docFreq, the number of documents which contain the term.
+    termInfo->docFreq = input->readVInt();
+    //Read freqPointer, a pointer into the TermFreqs file (.frq)
+    termInfo->freqPointer += input->readVLong();
+
+    //Read proxPointer, a pointer into the TermPosition file (.prx).
+    termInfo->proxPointer += input->readVLong();
+
+    if (format == -1) {
+        //  just read skipOffset in order to increment  file pointer;
+        // value is never used since skipTo is switched off
+        if (!isIndex) {
             if (termInfo->docFreq > formatM1SkipInterval) {
-               termInfo->skipOffset = input->readVInt(); 
+                termInfo->skipOffset = input->readVInt();
             }
-         }
-      }else{
-         if (termInfo->docFreq >= skipInterval) 
-            termInfo->skipOffset = input->readVInt();
-      }
-
-               //Check if the enumeration is an index
-               if (isIndex)
-                       //read index pointer
-                       indexPointer += input->readVLong();
-
-               return true;
-       }
-
-       Term* SegmentTermEnum::term(bool pointer) {
-               if ( pointer )
-                       return _CL_POINTER(_term);
-               else
-                       return _term;
-       }
-
-       void SegmentTermEnum::scanTo(const Term *term){
-       //Func - Scan for Term without allocating new Terms
-       //Pre  - term != NULL
-       //Post - The iterator term has been moved to the position where Term is 
expected to be
-       //       in the enumeration
-               while ( term->compareTo(this->_term) > 0 && next()) 
-               {
-               }
-       }
-
-       void SegmentTermEnum::close() {
-       //Func - Closes the enumeration to further activity, freeing resources.
-       //Pre  - true
-       //Post - The inputStream input has been closed
-
-                       input->close();
-       }
-
-       int32_t SegmentTermEnum::docFreq() const {
-       //Func - Returns the document frequency of the current term in the set
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post  - The document frequency of the current enumerated term has 
been returned
-
-               return termInfo->docFreq;
-       }
-
-       void SegmentTermEnum::seek(const int64_t pointer, const int32_t p, 
Term* t, TermInfo* ti) {
-       //Func - Repositions term and termInfo within the enumeration
-       //Pre  - pointer >= 0
-       //       p >= 0 and contains the new position within the enumeration
-       //       t is a valid reference to a Term and is the new current term 
in the enumeration
-       //       ti is a valid reference to a TermInfo and is corresponding 
TermInfo form the new
-       //       current Term
-       //Post - term and terminfo have been repositioned within the enumeration
-
-               //Reset the IndexInput input to pointer
-               input->seek(pointer);
-               //Assign the new position
-               position = p;
-
-               //finalize the current term
-               if ( _term == NULL || 
_LUCENE_ATOMIC_INT_GET(_term->__cl_refcount) > 1 ){
-                       _CLDECDELETE(_term);
-                       //Get a pointer from t and increase the reference 
counter of t
-                       _term = _CLNEW Term; //cannot use reference, because 
TermInfosReader uses non ref-counted array
-               }
-               _term->set(t,t->text());
-
-               //finalize prev
-               _CLDECDELETE(prev);
-
-               //Change the current termInfo so it matches the new current term
-               termInfo->set(ti);
-
-               //Have the buffer grown if needed
-               if ( bufferLength <= _term->textLength() )
-                       growBuffer(_term->textLength(), true );           // 
copy term text into buffer
-               else
-                       _tcsncpy(buffer,_term->text(),bufferLength); //just 
copy the buffer
-       }
-
-       TermInfo* SegmentTermEnum::getTermInfo()const {
-       //Func - Returns a clone of the current termInfo
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post - A clone of the current termInfo has been returned
-
-               return _CLNEW TermInfo(*termInfo); //clone
-       }
-
-       void SegmentTermEnum::getTermInfo(TermInfo* ti)const {
-       //Func - Retrieves a clone of termInfo through the reference ti
-       //Pre  - ti contains a valid reference to TermInfo
-       //       termInfo != NULL
-       //       next() must have been called once
-       //Post - ti contains a clone of termInfo
-
-               ti->set(termInfo);
-       }
-
-       int64_t SegmentTermEnum::freqPointer()const {
-       //Func - Returns the freqpointer of the current termInfo
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post - The freqpointer of the current termInfo has been returned
-
-               return termInfo->freqPointer;
-       }
-
-       int64_t SegmentTermEnum::proxPointer()const {
-       //Func - Returns the proxPointer of the current termInfo
-       //Pre  - termInfo != NULL
-       //       next() must have been called once
-       //Post - the proxPointer of the current termInfo has been returned
-
-               return termInfo->proxPointer;
-       }
-
-       SegmentTermEnum* SegmentTermEnum::clone() const {
-       //Func - Returns a clone of this instance
-       //Pre  - true
-       //Post - An clone of this instance has been returned
-
-               return _CLNEW SegmentTermEnum(*this);
-       }
-
-       Term* SegmentTermEnum::readTerm(Term* reuse) {
-       //Func - Reads the next term in the enumeration
-       //Pre  - true
-       //Post - The next Term in the enumeration has been read and returned
-
-               //Read the start position from the inputStream input
-               int32_t start = input->readVInt();
-               //Read the length of term in the inputStream input
-               int32_t length = input->readVInt();
-
-               //Calculated the total lenght of bytes that buffer must be to 
contain the current
-               //chars in buffer and the new ones yet to be read
-               uint32_t totalLength = start + length;
-
-               if (static_cast<uint32_t>(bufferLength) < totalLength+1)
-                       growBuffer(totalLength, false); //dont copy the buffer 
over.
-
-               //Read a length number of characters into the buffer from 
position start in the inputStream input
-               input->readChars(buffer, start, length);
-               //Null terminate the string
-               buffer[totalLength] = 0;
-
-               //Return a new Term     
-               int32_t field = input->readVInt();
-               const TCHAR* fieldname = fieldInfos->fieldName(field);
-               if ( reuse == NULL )
-                       reuse = _CLNEW Term;
-
-               reuse->set(fieldname, buffer, false);
-               return reuse;
-       }
-
-       void SegmentTermEnum::growBuffer(const uint32_t length, bool 
force_copy) {
-       //Func - Instantiate a buffer of length length+1
-       //Pre  - length > 0
-       //Post - pre(buffer) has been deleted with its contents. A new buffer
-       //       has been allocated of length length+1 and the text of term has 
been copied
-       //       to buffer
-               //todo: we could guess that we will need to re-grow this
-               //buffer a few times...so start off with a reasonable grow
-               //value...
-               if ( bufferLength > length )
-                       return;
-
-        //Store the new bufferLength
-               if ( length - bufferLength < 8 )
-                       bufferLength = length+8;
-               else
-                       bufferLength = length+1;
-
-               bool copy = buffer==NULL;
-
-               //Instantiate the new buffer + 1 is needed for terminator '\0'
-               if ( buffer == NULL )
-                       buffer = (TCHAR*)malloc(sizeof(TCHAR) * 
(bufferLength+1));
-               else
-                       buffer = (TCHAR*)realloc(buffer, sizeof(TCHAR) * 
(bufferLength+1));
-
-               if ( copy || force_copy){
-                       //Copy the text of term into buffer
-                       _tcsncpy(buffer,_term->text(),bufferLength);
-               }
-       }
+        }
+    } else {
+        if (termInfo->docFreq >= skipInterval) termInfo->skipOffset = 
input->readVInt();
+    }
+
+    //Check if the enumeration is an index
+    if (isIndex)
+        //read index pointer
+        indexPointer += input->readVLong();
+
+    return true;
+}
+
+Term* SegmentTermEnum::term(bool pointer) {
+    if (pointer)
+        return _CL_POINTER(_term);
+    else
+        return _term;
+}
+
+void SegmentTermEnum::scanTo(const Term* term) {
+    //Func - Scan for Term without allocating new Terms
+    //Pre  - term != NULL
+    //Post - The iterator term has been moved to the position where Term is 
expected to be
+    //       in the enumeration
+    while (term->compareTo(this->_term) > 0 && next()) {
+    }
+}
+
+void SegmentTermEnum::close() {
+    //Func - Closes the enumeration to further activity, freeing resources.
+    //Pre  - true
+    //Post - The inputStream input has been closed
+
+    input->close();
+}
+
+int32_t SegmentTermEnum::docFreq() const {
+    //Func - Returns the document frequency of the current term in the set
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post  - The document frequency of the current enumerated term has been 
returned
+
+    return termInfo->docFreq;
+}
+
+void SegmentTermEnum::seek(const int64_t pointer, const int32_t p, Term* t, 
TermInfo* ti) {
+    //Func - Repositions term and termInfo within the enumeration
+    //Pre  - pointer >= 0
+    //       p >= 0 and contains the new position within the enumeration
+    //       t is a valid reference to a Term and is the new current term in 
the enumeration
+    //       ti is a valid reference to a TermInfo and is corresponding 
TermInfo form the new
+    //       current Term
+    //Post - term and terminfo have been repositioned within the enumeration
+
+    //Reset the IndexInput input to pointer
+    input->seek(pointer);
+    //Assign the new position
+    position = p;
+
+    //finalize the current term
+    if (_term == NULL || _LUCENE_ATOMIC_INT_GET(_term->__cl_refcount) > 1) {
+        _CLDECDELETE(_term);
+        //Get a pointer from t and increase the reference counter of t
+        _term = _CLNEW
+                Term; //cannot use reference, because TermInfosReader uses non 
ref-counted array
+    }
+    _term->set(t, t->text());
+
+    //finalize prev
+    _CLDECDELETE(prev);
+
+    //Change the current termInfo so it matches the new current term
+    termInfo->set(ti);
+
+    //Have the buffer grown if needed
+    if (bufferLength <= _term->textLength())
+        growBuffer(_term->textLength(), true); // copy term text into buffer
+    else
+        _tcsncpy(buffer, _term->text(), bufferLength); //just copy the buffer
+}
+
+TermInfo* SegmentTermEnum::getTermInfo() const {
+    //Func - Returns a clone of the current termInfo
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post - A clone of the current termInfo has been returned
+
+    return _CLNEW TermInfo(*termInfo); //clone
+}
+
+void SegmentTermEnum::getTermInfo(TermInfo* ti) const {
+    //Func - Retrieves a clone of termInfo through the reference ti
+    //Pre  - ti contains a valid reference to TermInfo
+    //       termInfo != NULL
+    //       next() must have been called once
+    //Post - ti contains a clone of termInfo
+
+    ti->set(termInfo);
+}
+
+int64_t SegmentTermEnum::freqPointer() const {
+    //Func - Returns the freqpointer of the current termInfo
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post - The freqpointer of the current termInfo has been returned
+
+    return termInfo->freqPointer;
+}
+
+int64_t SegmentTermEnum::proxPointer() const {
+    //Func - Returns the proxPointer of the current termInfo
+    //Pre  - termInfo != NULL
+    //       next() must have been called once
+    //Post - the proxPointer of the current termInfo has been returned
+
+    return termInfo->proxPointer;
+}
+
+SegmentTermEnum* SegmentTermEnum::clone() const {
+    //Func - Returns a clone of this instance
+    //Pre  - true
+    //Post - An clone of this instance has been returned
+
+    return _CLNEW SegmentTermEnum(*this);
+}
+
+Term* SegmentTermEnum::readTerm(Term* reuse) {
+    //Func - Reads the next term in the enumeration
+    //Pre  - true
+    //Post - The next Term in the enumeration has been read and returned
+
+    //Read the start position from the inputStream input
+    int32_t start = input->readVInt();
+    //Read the length of term in the inputStream input
+    int32_t length = input->readVInt();
+
+    //Calculated the total lenght of bytes that buffer must be to contain the 
current
+    //chars in buffer and the new ones yet to be read
+    uint32_t totalLength = start + length;
+
+    if (static_cast<uint32_t>(bufferLength) < totalLength + 1)
+        growBuffer(totalLength, false); //dont copy the buffer over.
+
+    //Read a length number of characters into the buffer from position start 
in the inputStream input
+    input->readChars(buffer, start, length);
+    //Null terminate the string
+    buffer[totalLength] = 0;
+
+    //Return a new Term
+    int32_t field = input->readVInt();
+    const TCHAR* fieldname = fieldInfos->fieldName(field);
+    if (reuse == NULL) reuse = _CLNEW Term;
+
+    reuse->set(fieldname, buffer, false);
+    return reuse;
+}
+
+void SegmentTermEnum::growBuffer(const uint32_t length, bool force_copy) {
+    //Func - Instantiate a buffer of length length+1
+    //Pre  - length > 0
+    //Post - pre(buffer) has been deleted with its contents. A new buffer
+    //       has been allocated of length length+1 and the text of term has 
been copied
+    //       to buffer
+    //todo: we could guess that we will need to re-grow this
+    //buffer a few times...so start off with a reasonable grow
+    //value...
+    if (bufferLength > length) return;
+
+    //Store the new bufferLength
+    if (length - bufferLength < 8)
+        bufferLength = length + 8;
+    else
+        bufferLength = length + 1;
+
+    bool copy = buffer == NULL;
+
+    //Instantiate the new buffer + 1 is needed for terminator '\0'
+    if (buffer == NULL)
+        buffer = (TCHAR*)malloc(sizeof(TCHAR) * (bufferLength + 1));
+    else
+        buffer = (TCHAR*)realloc(buffer, sizeof(TCHAR) * (bufferLength + 1));
+
+    if (copy || force_copy) {
+        //Copy the text of term into buffer
+        _tcsncpy(buffer, _term->text(), bufferLength);
+    }
+}
 
 CL_NS_END
diff --git a/src/core/CLucene/index/TermInfosReader.cpp 
b/src/core/CLucene/index/TermInfosReader.cpp
index 7996d4d4f8..6cf8b42fe4 100644
--- a/src/core/CLucene/index/TermInfosReader.cpp
+++ b/src/core/CLucene/index/TermInfosReader.cpp
@@ -24,207 +24,206 @@ CL_NS_USE(store)
 CL_NS_USE(util)
 CL_NS_DEF(index)
 
-
-  TermInfosReader::TermInfosReader(Directory* dir, const char* seg, 
FieldInfos* fis, const int32_t readBufferSize):
-      directory (dir),fieldInfos (fis), indexTerms(NULL), indexInfos(NULL), 
indexPointers(NULL), indexDivisor(1)
-  {
-  //Func - Constructor.
-  //       Reads the TermInfos file (.tis) and eventually the Term Info Index 
file (.tii)
-  //Pre  - dir is a reference to a valid Directory
-  //       Fis contains a valid reference to an FieldInfos instance
-  //       seg != NULL and contains the name of the segment
-  //Post - An instance has been created and the index named seg has been read. 
(Remember
-  //       a segment is nothing more then an independently readable index)
-
-      CND_PRECONDITION(seg != NULL, "seg is NULL");
-
-         //Initialize the name of the segment
-      segment    =  seg;
-
-      //Create a filname fo a Term Info File
-         string tisFile = Misc::segmentname(segment,".tis");
-         string tiiFile = Misc::segmentname(segment,".tii");
-         bool success = false;
+TermInfosReader::TermInfosReader(Directory* dir, const char* seg, FieldInfos* 
fis,
+                                 const int32_t readBufferSize)
+        : directory(dir),
+          fieldInfos(fis),
+          indexTerms(NULL),
+          indexInfos(NULL),
+          indexPointers(NULL),
+          indexDivisor(1) {
+    //Func - Constructor.
+    //       Reads the TermInfos file (.tis) and eventually the Term Info 
Index file (.tii)
+    //Pre  - dir is a reference to a valid Directory
+    //       Fis contains a valid reference to an FieldInfos instance
+    //       seg != NULL and contains the name of the segment
+    //Post - An instance has been created and the index named seg has been 
read. (Remember
+    //       a segment is nothing more then an independently readable index)
+
+    CND_PRECONDITION(seg != NULL, "seg is NULL");
+
+    //Initialize the name of the segment
+    segment = seg;
+
+    //Create a filname fo a Term Info File
+    string tisFile = Misc::segmentname(segment, ".tis");
+    string tiiFile = Misc::segmentname(segment, ".tii");
+    bool success = false;
     origEnum = indexEnum = NULL;
     _size = indexTermsLength = totalIndexInterval = 0;
-          indexIsRead = false;
-
-         try {
-      //Create an SegmentTermEnum for storing all the terms read of the segment
-
-      // tii
-      auto tiiStream = directory->openInput( tiiFile.c_str(), readBufferSize );
-      indexEnum = _CLNEW SegmentTermEnum(tiiStream, fieldInfos, true, -1);
-      CND_CONDITION(indexEnum != NULL, "No memory could be allocated for index 
enumerator");
-
-      // tis
-      auto tisStream = directory->openInput( tisFile.c_str(), readBufferSize );
-      origEnum = _CLNEW SegmentTermEnum(tisStream, fieldInfos, false, 
indexEnum->getFormat());
-      origEnum->initByTii(indexEnum);
-      CND_CONDITION(origEnum != NULL, "No memory could be allocated for index 
enumerator");
-      _size = origEnum->size;
-      totalIndexInterval = origEnum->indexInterval;
-
-      //call ensureIndexIsRead to load data to memory right now
-      ensureIndexIsRead();
-
-      success = true;
-         } _CLFINALLY({
-                 // With lock-less commits, it's entirely possible (and
-                 // fine) to hit a FileNotFound exception above. In
-                 // this case, we want to explicitly close any subset
-                 // of things that were opened so that we don't have to
-                 // wait for a GC to do so.
-                 if (!success) {
-                         close();
-                 }
-         });
-
-  }
-
-  TermInfosReader::~TermInfosReader(){
-  //Func - Destructor
-  //Pre  - true
-  //Post - The instance has been destroyed
-
-      //Close the TermInfosReader to be absolutly sure that enumerator has 
been closed
-         //and the arrays indexTerms, indexPointers and indexInfos and  their 
elements
-         //have been destroyed
-      close();
-  }
-  int32_t TermInfosReader::getSkipInterval() const {
+    indexIsRead = false;
+
+    try {
+        //Create an SegmentTermEnum for storing all the terms read of the 
segment
+
+        // tii
+        auto tiiStream = directory->openInput(tiiFile.c_str(), readBufferSize);
+        indexEnum = _CLNEW SegmentTermEnum(tiiStream, fieldInfos, true);
+        indexEnum->init(-1);
+        CND_CONDITION(indexEnum != NULL, "No memory could be allocated for 
index enumerator");
+
+        // tis
+        auto tisStream = directory->openInput(tisFile.c_str(), readBufferSize);
+        origEnum = _CLNEW SegmentTermEnum(tisStream, fieldInfos, false);
+        origEnum->init(indexEnum->getFormat());
+        origEnum->initByTii(indexEnum);
+        CND_CONDITION(origEnum != NULL, "No memory could be allocated for 
index enumerator");
+        _size = origEnum->size;
+        totalIndexInterval = origEnum->indexInterval;
+
+        //call ensureIndexIsRead to load data to memory right now
+        ensureIndexIsRead();
+
+        success = true;
+    }
+    _CLFINALLY({
+        // With lock-less commits, it's entirely possible (and
+        // fine) to hit a FileNotFound exception above. In
+        // this case, we want to explicitly close any subset
+        // of things that were opened so that we don't have to
+        // wait for a GC to do so.
+        if (!success) {
+            close();
+        }
+    });
+}
+
+TermInfosReader::~TermInfosReader() {
+    //Func - Destructor
+    //Pre  - true
+    //Post - The instance has been destroyed
+
+    //Close the TermInfosReader to be absolutly sure that enumerator has been 
closed
+    //and the arrays indexTerms, indexPointers and indexInfos and  their 
elements
+    //have been destroyed
+    close();
+}
+int32_t TermInfosReader::getSkipInterval() const {
     return origEnum->skipInterval;
-  }
+}
 
-  int32_t TermInfosReader::getMaxSkipLevels() const {
+int32_t TermInfosReader::getMaxSkipLevels() const {
     return origEnum->maxSkipLevels;
-  }
-
-  void TermInfosReader::setIndexDivisor(const int32_t _indexDivisor) {
-         if (indexDivisor < 1)
-                 _CLTHROWA(CL_ERR_IllegalArgument, "indexDivisor must be > 0");
+}
 
-         if (indexTerms != NULL)
-                 _CLTHROWA(CL_ERR_IllegalArgument, "index terms are already 
loaded");
+void TermInfosReader::setIndexDivisor(const int32_t _indexDivisor) {
+    if (indexDivisor < 1) _CLTHROWA(CL_ERR_IllegalArgument, "indexDivisor must 
be > 0");
 
-         this->indexDivisor = _indexDivisor;
-         totalIndexInterval = origEnum->indexInterval * _indexDivisor;
-  }
+    if (indexTerms != NULL) _CLTHROWA(CL_ERR_IllegalArgument, "index terms are 
already loaded");
 
-  int32_t TermInfosReader::getIndexDivisor() const { return indexDivisor; }
-  void TermInfosReader::close() {
+    this->indexDivisor = _indexDivisor;
+    totalIndexInterval = origEnum->indexInterval * _indexDivisor;
+}
 
-         //Check if indexTerms and indexInfos exist
-     if (indexTerms && indexInfos){
-          //Iterate through arrays indexTerms and indexPointer to
-             //destroy their elements
+int32_t TermInfosReader::getIndexDivisor() const {
+    return indexDivisor;
+}
+void TermInfosReader::close() {
+    //Check if indexTerms and indexInfos exist
+    if (indexTerms && indexInfos) {
+        //Iterate through arrays indexTerms and indexPointer to
+        //destroy their elements
 #ifdef _DEBUG
-         for ( int32_t i=0; i<indexTermsLength;++i ){
+        for (int32_t i = 0; i < indexTermsLength; ++i) {
             indexTerms[i].__cl_refcount--;
-         }
+        }
 #endif
-     }
-         //Delete the arrays
-     if (indexTerms){
-         delete [] indexTerms;
-        indexTerms = NULL;
-     }
-     if (indexInfos){
-         _CLDELETE_ARRAY(indexInfos);
-        indexInfos = NULL;
-     }
-
-      //Delete the arrays
-     if (indexPointers) {
+    }
+    //Delete the arrays
+    if (indexTerms) {
+        delete[] indexTerms;
+        indexTerms = NULL;
+    }
+    if (indexInfos) {
+        _CLDELETE_ARRAY(indexInfos);
+        indexInfos = NULL;
+    }
+
+    //Delete the arrays
+    if (indexPointers) {
         _CLDELETE_ARRAY(indexPointers);
         indexPointers = NULL;
-     }
+    }
 
-      if (origEnum != NULL){
+    if (origEnum != NULL) {
         origEnum->close();
 
-           //Get a pointer to IndexInput used by the enumeration but
-           //instantiated in the constructor by directory.open( tisFile )
-        IndexInput *is = origEnum->input;
+        //Get a pointer to IndexInput used by the enumeration but
+        //instantiated in the constructor by directory.open( tisFile )
+        IndexInput* is = origEnum->input;
 
         //Delete the enumuration enumerator
         _CLDELETE(origEnum);
 
         //Delete the IndexInput
         _CLDELETE(is);
-      }
+    }
 
-      if (indexEnum != NULL){
+    if (indexEnum != NULL) {
         indexEnum->close();
 
-           //Get a pointer to IndexInput used by the enumeration but
-           //instantiated in the constructor by directory.open( tiiFile )
-        IndexInput *is = indexEnum->input;
+        //Get a pointer to IndexInput used by the enumeration but
+        //instantiated in the constructor by directory.open( tiiFile )
+        IndexInput* is = indexEnum->input;
 
         //Delete the enumuration enumerator
         _CLDELETE(indexEnum);
-       indexEnum = NULL;
+        indexEnum = NULL;
 
         //Delete the IndexInput
         _CLDELETE(is);
-      }
-         enumerators.setNull();
-  }
-
-  int64_t TermInfosReader::size() const{
-  //Func - Return the size of the enumeration of TermInfos
-  //Pre  - true
-  //Post - size has been returened
+    }
+    enumerators.setNull();
+}
 
-      return _size;
-  }
+int64_t TermInfosReader::size() const {
+    //Func - Return the size of the enumeration of TermInfos
+    //Pre  - true
+    //Post - size has been returened
 
+    return _size;
+}
 
-  Term* TermInfosReader::get(const int32_t position) {
-  //Func - Returns the nth term in the set
-  //Pre  - position > = 0
-  //Post - The n-th term in the set has been returned
+Term* TermInfosReader::get(const int32_t position) {
+    //Func - Returns the nth term in the set
+    //Pre  - position > = 0
+    //Post - The n-th term in the set has been returned
 
-         //Check if the size is 0 because then there are no terms
-      if (_size == 0)
-          return NULL;
+    //Check if the size is 0 because then there are no terms
+    if (_size == 0) return NULL;
 
-         SegmentTermEnum* enumerator = getEnum();
+    SegmentTermEnum* enumerator = getEnum();
 
-         if (
-             enumerator != NULL //an enumeration exists
-             && enumerator->term(false) != NULL // term is at or past current
-             && position >= enumerator->position
-                 && position < (enumerator->position + totalIndexInterval)
-            )
-         {
-                 return scanEnum(position);                      // can avoid 
seek
-         }
+    if (enumerator != NULL                 //an enumeration exists
+        && enumerator->term(false) != NULL // term is at or past current
+        && position >= enumerator->position &&
+        position < (enumerator->position + totalIndexInterval)) {
+        return scanEnum(position); // can avoid seek
+    }
 
     //random-access: must seek
     seekEnum(position / totalIndexInterval);
 
-       //Get the Term at position
+    //Get the Term at position
     return scanEnum(position);
-  }
+}
 
-  SegmentTermEnum* TermInfosReader::getEnum(){
+SegmentTermEnum* TermInfosReader::getEnum() {
     SegmentTermEnum* termEnum = enumerators.get();
-    if (termEnum == NULL){
-      termEnum = terms();
-      enumerators.set(termEnum);
+    if (termEnum == NULL) {
+        termEnum = terms();
+        enumerators.set(termEnum);
     }
     return termEnum;
-  }
+}
 
-  TermInfo* TermInfosReader::get(const Term* term){
-  //Func - Returns a TermInfo for a term
-  //Pre  - term holds a valid reference to term
-  //Post - if term can be found its TermInfo has been returned otherwise NULL
+TermInfo* TermInfosReader::get(const Term* term) {
+    //Func - Returns a TermInfo for a term
+    //Pre  - term holds a valid reference to term
+    //Post - if term can be found its TermInfo has been returned otherwise NULL
 
     //If the size of the enumeration is 0 then no Terms have been read
-       if (_size == 0)
-               return NULL;
+    if (_size == 0) return NULL;
 
     ensureIndexIsRead();
 
@@ -233,250 +232,238 @@ CL_NS_DEF(index)
 
     // optimize sequential access: first try scanning cached enumerator w/o 
seeking
     if (
-             //the current term of the enumeration enumerator is not at the 
end AND
-       enumerator->term(false) != NULL  &&
-       (
-            //there exists a previous current called prev and term is 
positioned after this prev OR
-            ( enumerator->prev != NULL && term->compareTo(enumerator->prev) > 
0) ||
-            //term is positioned at the same position as the current of 
enumerator or at a higher position
-            term->compareTo(enumerator->term(false)) >= 0 )
-       )
-     {
-
-               //Calculate the offset for the position
-               int32_t _enumOffset = 
(int32_t)(enumerator->position/totalIndexInterval)+1;
-
-               // but before end of block
-               if (
-                       //the length of indexTerms (the number of terms in 
enumerator) equals
-                       //_enum_offset OR
-                       indexTermsLength == _enumOffset  ||
-                       //term is positioned in front of term found at 
_enumOffset in indexTerms
-                       term->compareTo(&indexTerms[_enumOffset]) < 0){
-
-                       //no need to seek, retrieve the TermInfo for term
-                       return scanEnum(term);
+            //the current term of the enumeration enumerator is not at the end 
AND
+            enumerator->term(false) != NULL &&
+            (
+                    //there exists a previous current called prev and term is 
positioned after this prev OR
+                    (enumerator->prev != NULL && 
term->compareTo(enumerator->prev) > 0) ||
+                    //term is positioned at the same position as the current 
of enumerator or at a higher position
+                    term->compareTo(enumerator->term(false)) >= 0)) {
+        //Calculate the offset for the position
+        int32_t _enumOffset = (int32_t)(enumerator->position / 
totalIndexInterval) + 1;
+
+        // but before end of block
+        if (
+                //the length of indexTerms (the number of terms in enumerator) 
equals
+                //_enum_offset OR
+                indexTermsLength == _enumOffset ||
+                //term is positioned in front of term found at _enumOffset in 
indexTerms
+                term->compareTo(&indexTerms[_enumOffset]) < 0) {
+            //no need to seek, retrieve the TermInfo for term
+            return scanEnum(term);
         }
     }
 
     //Reposition current term in the enumeration
     seekEnum(getIndexOffset(term));
-       //Return the TermInfo for term
+    //Return the TermInfo for term
     return scanEnum(term);
-  }
-
+}
 
-  int64_t TermInfosReader::getPosition(const Term* term) {
-  //Func - Returns the position of a Term in the set
-  //Pre  - term holds a valid reference to a Term
-  //       enumerator != NULL
-  //Post - If term was found then its position is returned otherwise -1
+int64_t TermInfosReader::getPosition(const Term* term) {
+    //Func - Returns the position of a Term in the set
+    //Pre  - term holds a valid reference to a Term
+    //       enumerator != NULL
+    //Post - If term was found then its position is returned otherwise -1
 
-         //if the enumeration is empty then return -1
-         if (_size == 0)
-                 return -1;
+    //if the enumeration is empty then return -1
+    if (_size == 0) return -1;
 
-         ensureIndexIsRead();
-
-      //Retrieve the indexOffset for term
-      int32_t indexOffset = getIndexOffset(term);
-      seekEnum(indexOffset);
+    ensureIndexIsRead();
 
-         SegmentTermEnum* enumerator = getEnum();
+    //Retrieve the indexOffset for term
+    int32_t indexOffset = getIndexOffset(term);
+    seekEnum(indexOffset);
 
-      while(term->compareTo(enumerator->term(false)) > 0 && 
enumerator->next()) {}
+    SegmentTermEnum* enumerator = getEnum();
 
-         if ( term->equals(enumerator->term(false)) ){
-          return enumerator->position;
-         }else
-          return -1;
-  }
+    while (term->compareTo(enumerator->term(false)) > 0 && enumerator->next()) 
{
+    }
 
-  SegmentTermEnum* TermInfosReader::terms(const Term* term) {
-  //Func - Returns an enumeration of terms starting at or after the named term.
-  //       If term is null then enumerator is set to the beginning
-  //Pre  - term holds a valid reference to a Term
-  //       enumerator != NULL
-  //Post - An enumeration of terms starting at or after the named term has 
been returned
+    if (term->equals(enumerator->term(false))) {
+        return enumerator->position;
+    } else
+        return -1;
+}
+
+SegmentTermEnum* TermInfosReader::terms(const Term* term) {
+    //Func - Returns an enumeration of terms starting at or after the named 
term.
+    //       If term is null then enumerator is set to the beginning
+    //Pre  - term holds a valid reference to a Term
+    //       enumerator != NULL
+    //Post - An enumeration of terms starting at or after the named term has 
been returned
+
+    SegmentTermEnum* enumerator = NULL;
+    if (term != NULL) {
+        //Seek enumerator to term; delete the new TermInfo that's returned.
+        TermInfo* ti = get(term);
+        _CLLDELETE(ti);
+        enumerator = getEnum();
+    } else
+        enumerator = origEnum;
+
+    //Clone the entire enumeration
+    SegmentTermEnum* cln = enumerator->clone();
+
+    //Check if cln points to a valid instance
+    CND_CONDITION(cln != NULL, "cln is NULL");
+
+    return cln;
+}
+
+void TermInfosReader::ensureIndexIsRead() {
+    //Func - Reads the term info index file or .tti file.
+    //       This file contains every IndexInterval-th entry from the .tis 
file,
+    //       along with its location in the "tis" file. This is designed to be 
read entirely
+    //       into memory and used to provide random access to the "tis" file.
+    //Pre  - indexTerms    = NULL
+    //       indexInfos    = NULL
+    //       indexPointers = NULL
+    //Post - The term info index file has been read into memory
 
-         SegmentTermEnum* enumerator = NULL;
-         if ( term != NULL ){
-               //Seek enumerator to term; delete the new TermInfo that's 
returned.
-               TermInfo* ti = get(term);
-               _CLLDELETE(ti);
-               enumerator = getEnum();
-         }else
-           enumerator = origEnum;
+    SCOPED_LOCK_MUTEX(THIS_LOCK)
 
-      //Clone the entire enumeration
-      SegmentTermEnum* cln = enumerator->clone();
+    if (indexIsRead) return;
+
+    //https://jira.qianxin-inc.cn/browse/XHBUG-2921
+    //https://jira.qianxin-inc.cn/browse/XHBUG-3053
+    if (indexEnum == NULL) _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
+
+    try {
+        indexTermsLength = (size_t)indexEnum->size;
+
+        //Instantiate an block of Term's,so that each one doesn't have to be 
new'd
+        indexTerms = new Term[indexTermsLength];
+        CND_CONDITION(
+                indexTerms != NULL,
+                "No memory could be allocated for indexTerms"); //Check if is 
indexTerms is a valid array
+
+        //Instantiate an big block of TermInfo's, so that each one doesn't 
have to be new'd
+        indexInfos = _CL_NEWARRAY(TermInfo, indexTermsLength);
+        CND_CONDITION(
+                indexInfos != NULL,
+                "No memory could be allocated for indexInfos"); //Check if is 
indexInfos is a valid array
+
+        //Instantiate an array indexPointers that contains pointers to the 
term info index file
+        indexPointers = _CL_NEWARRAY(int64_t, indexTermsLength);
+        CND_CONDITION(
+                indexPointers != NULL,
+                "No memory could be allocated for indexPointers"); //Check if 
is indexPointers is a valid array
+
+        //Iterate through the terms of indexEnum
+        for (int32_t i = 0; indexEnum->next(); ++i) {
+            indexTerms[i].set(indexEnum->term(false), 
indexEnum->term(false)->text());
+            indexEnum->getTermInfo(&indexInfos[i]);
+            indexPointers[i] = indexEnum->indexPointer;
+
+            for (int32_t j = 1; j < indexDivisor; j++)
+                if (!indexEnum->next()) break;
+        }
+        indexIsRead = true;
+    }
+    _CLFINALLY(indexEnum->close();
+               //Close and delete the IndexInput is. The close is done by the 
destructor.
+               _CLDELETE(indexEnum->input); _CLDELETE(indexEnum); indexEnum = 
NULL;);
+}
+
+int32_t TermInfosReader::getIndexOffset(const Term* term) {
+    //Func - Returns the offset of the greatest index entry which is less than 
or equal to term.
+    //Pre  - term holds a reference to a valid term
+    //       indexTerms != NULL
+    //Post - The new offset has been returned
+
+    //Check if is indexTerms is a valid array
+    CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
+
+    int32_t lo = 0;
+    int32_t hi = indexTermsLength - 1;
+    int32_t mid;
+    int32_t delta;
+
+    while (hi >= lo) {
+        //Start in the middle betwee hi and lo
+        mid = (lo + hi) >> 1;
+
+        //Check if is indexTerms[mid] is a valid instance of Term
+        CND_PRECONDITION(&indexTerms[mid] != NULL, "indexTerms[mid] is NULL");
+        CND_PRECONDITION(mid < indexTermsLength, "mid >= indexTermsLength");
+
+        //Determine if term is before mid or after mid
+        delta = term->compareTo(&indexTerms[mid]);
+        if (delta < 0) {
+            //Calculate the new hi
+            hi = mid - 1;
+        } else if (delta > 0) {
+            //Calculate the new lo
+            lo = mid + 1;
+        } else {
+            //term has been found so return its position
+            return mid;
+        }
+    }
+    // the new starting offset
+    return hi;
+}
+
+void TermInfosReader::seekEnum(const int32_t indexOffset) {
+    //Func - Reposition the current Term and TermInfo to indexOffset
+    //Pre  - indexOffset >= 0
+    //       indexTerms    != NULL
+    //       indexInfos    != NULL
+    //       indexPointers != NULL
+    //Post - The current Term and Terminfo have been repositioned to 
indexOffset
+
+    CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative 
number");
+    CND_PRECONDITION(indexTerms != NULL, "indexTerms is NULL");
+    CND_PRECONDITION(indexInfos != NULL, "indexInfos is NULL");
+    CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
 
-      //Check if cln points to a valid instance
-      CND_CONDITION(cln != NULL,"cln is NULL");
+    SegmentTermEnum* enumerator = getEnum();
+    enumerator->seek(indexPointers[indexOffset], (indexOffset * 
totalIndexInterval) - 1,
+                     &indexTerms[indexOffset], &indexInfos[indexOffset]);
+}
+
+TermInfo* TermInfosReader::scanEnum(const Term* term) {
+    //Func - Scans the Enumeration of terms for term and returns the 
corresponding TermInfo instance if found.
+    //       The search is started from the current term.
+    //Pre  - term contains a valid reference to a Term
+    //       enumerator != NULL
+    //Post - if term has been found the corresponding TermInfo has been 
returned otherwise NULL
+    //       has been returned
 
-      return cln;
-  }
+    SegmentTermEnum* enumerator = getEnum();
+    enumerator->scanTo(term);
+
+    //Check if the at the position the Term term can be found
+    if (enumerator->term(false) != NULL && 
term->equals(enumerator->term(false))) {
+        //Return the TermInfo instance about term
+        return enumerator->getTermInfo();
+    } else {
+        //term was not found so no TermInfo can be returned
+        return NULL;
+    }
+}
 
+Term* TermInfosReader::scanEnum(const int32_t position) {
+    //Func - Scans the enumeration to the requested position and returns the
+    //       Term located at that position
+    //Pre  - position > = 0
+    //       enumerator != NULL
+    //Post - The Term at the requested position has been returned
 
-  void TermInfosReader::ensureIndexIsRead() {
-  //Func - Reads the term info index file or .tti file.
-  //       This file contains every IndexInterval-th entry from the .tis file,
-  //       along with its location in the "tis" file. This is designed to be 
read entirely
-  //       into memory and used to provide random access to the "tis" file.
-  //Pre  - indexTerms    = NULL
-  //       indexInfos    = NULL
-  //       indexPointers = NULL
-  //Post - The term info index file has been read into memory
+    SegmentTermEnum* enumerator = getEnum();
 
-    SCOPED_LOCK_MUTEX(THIS_LOCK)
+    //As long the position of the enumeration enumerator is smaller than the 
requested one
+    while (enumerator->position < position) {
+        //Move the current of enumerator to the next
+        if (!enumerator->next()) {
+            //If there is no next it means that the requested position was to 
big
+            return NULL;
+        }
+    }
 
-         if (indexIsRead)
-                 return;
-
-      //https://jira.qianxin-inc.cn/browse/XHBUG-2921
-      //https://jira.qianxin-inc.cn/browse/XHBUG-3053
-      if (indexEnum == NULL)
-         _CLTHROWA(CL_ERR_NullPointer, "indexEnum is NULL");
-
-      try {
-          indexTermsLength = (size_t)indexEnum->size;
-
-                     //Instantiate an block of Term's,so that each one doesn't 
have to be new'd
-          indexTerms    = new Term[indexTermsLength];
-          CND_CONDITION(indexTerms != NULL,"No memory could be allocated for 
indexTerms");//Check if is indexTerms is a valid array
-
-                 //Instantiate an big block of TermInfo's, so that each one 
doesn't have to be new'd
-          indexInfos    = _CL_NEWARRAY(TermInfo,indexTermsLength);
-          CND_CONDITION(indexInfos != NULL,"No memory could be allocated for 
indexInfos"); //Check if is indexInfos is a valid array
-
-          //Instantiate an array indexPointers that contains pointers to the 
term info index file
-          indexPointers = _CL_NEWARRAY(int64_t,indexTermsLength);
-          CND_CONDITION(indexPointers != NULL,"No memory could be allocated 
for indexPointers");//Check if is indexPointers is a valid array
-
-                 //Iterate through the terms of indexEnum
-          for (int32_t i = 0; indexEnum->next(); ++i){
-              
indexTerms[i].set(indexEnum->term(false),indexEnum->term(false)->text());
-              indexEnum->getTermInfo(&indexInfos[i]);
-              indexPointers[i] = indexEnum->indexPointer;
-
-                               for (int32_t j = 1; j < indexDivisor; j++)
-                                       if (!indexEnum->next())
-                                               break;
-          }
-         indexIsRead = true;
-    }_CLFINALLY(
-          indexEnum->close();
-                 //Close and delete the IndexInput is. The close is done by 
the destructor.
-          _CLDELETE( indexEnum->input );
-          _CLDELETE( indexEnum );
-         indexEnum = NULL;
-    );
-  }
-
-
-  int32_t TermInfosReader::getIndexOffset(const Term* term){
-  //Func - Returns the offset of the greatest index entry which is less than 
or equal to term.
-  //Pre  - term holds a reference to a valid term
-  //       indexTerms != NULL
-  //Post - The new offset has been returned
-
-      //Check if is indexTerms is a valid array
-      CND_PRECONDITION(indexTerms != NULL,"indexTerms is NULL");
-
-      int32_t lo = 0;
-      int32_t hi = indexTermsLength - 1;
-         int32_t mid;
-         int32_t delta;
-
-      while (hi >= lo) {
-          //Start in the middle betwee hi and lo
-          mid = (lo + hi) >> 1;
-
-          //Check if is indexTerms[mid] is a valid instance of Term
-          CND_PRECONDITION(&indexTerms[mid] != NULL,"indexTerms[mid] is NULL");
-          CND_PRECONDITION(mid < indexTermsLength,"mid >= indexTermsLength");
-
-                 //Determine if term is before mid or after mid
-          delta = term->compareTo(&indexTerms[mid]);
-          if (delta < 0){
-              //Calculate the new hi
-              hi = mid - 1;
-          }else if (delta > 0){
-                  //Calculate the new lo
-                  lo = mid + 1;
-                         }else{
-                  //term has been found so return its position
-                  return mid;
-          }
-     }
-     // the new starting offset
-     return hi;
-  }
-
-  void TermInfosReader::seekEnum(const int32_t indexOffset) {
-  //Func - Reposition the current Term and TermInfo to indexOffset
-  //Pre  - indexOffset >= 0
-  //       indexTerms    != NULL
-  //       indexInfos    != NULL
-  //       indexPointers != NULL
-  //Post - The current Term and Terminfo have been repositioned to indexOffset
-
-      CND_PRECONDITION(indexOffset >= 0, "indexOffset contains a negative 
number");
-      CND_PRECONDITION(indexTerms != NULL,    "indexTerms is NULL");
-      CND_PRECONDITION(indexInfos != NULL,    "indexInfos is NULL");
-      CND_PRECONDITION(indexPointers != NULL, "indexPointers is NULL");
-
-         SegmentTermEnum* enumerator =  getEnum();
-         enumerator->seek(
-          indexPointers[indexOffset],
-                 (indexOffset * totalIndexInterval) - 1,
-          &indexTerms[indexOffset],
-                 &indexInfos[indexOffset]
-             );
-  }
-
-
-  TermInfo* TermInfosReader::scanEnum(const Term* term) {
-  //Func - Scans the Enumeration of terms for term and returns the 
corresponding TermInfo instance if found.
-  //       The search is started from the current term.
-  //Pre  - term contains a valid reference to a Term
-  //       enumerator != NULL
-  //Post - if term has been found the corresponding TermInfo has been returned 
otherwise NULL
-  //       has been returned
-
-      SegmentTermEnum* enumerator = getEnum();
-         enumerator->scanTo(term);
-
-      //Check if the at the position the Term term can be found
-         if (enumerator->term(false) != NULL && 
term->equals(enumerator->term(false)) ){
-                 //Return the TermInfo instance about term
-          return enumerator->getTermInfo();
-     }else{
-          //term was not found so no TermInfo can be returned
-          return NULL;
-     }
-  }
-
-  Term* TermInfosReader::scanEnum(const int32_t position) {
-  //Func - Scans the enumeration to the requested position and returns the
-  //       Term located at that position
-  //Pre  - position > = 0
-  //       enumerator != NULL
-  //Post - The Term at the requested position has been returned
-
-      SegmentTermEnum* enumerator = getEnum();
-
-         //As long the position of the enumeration enumerator is smaller than 
the requested one
-      while(enumerator->position < position){
-                 //Move the current of enumerator to the next
-                 if (!enumerator->next()){
-                         //If there is no next it means that the requested 
position was to big
-              return NULL;
-          }
-         }
-
-         //Return the Term a the requested position
-         return enumerator->term();
-  }
+    //Return the Term a the requested position
+    return enumerator->term();
+}
 
 CL_NS_END
diff --git a/src/core/CLucene/index/_SegmentHeader.h 
b/src/core/CLucene/index/_SegmentHeader.h
index bf988a2f27..c1f01e7cec 100644
--- a/src/core/CLucene/index/_SegmentHeader.h
+++ b/src/core/CLucene/index/_SegmentHeader.h
@@ -93,8 +93,8 @@ protected:
   int32_t count;
   int32_t df;
   CL_NS(util)::BitSet* deletedDocs;
-  int32_t _doc;
-  int32_t _freq;
+  int32_t _doc = -1;
+  int32_t _freq = 0;
   int32_t docs[PFOR_BLOCK_SIZE];         // buffered doc numbers
   int32_t freqs[PFOR_BLOCK_SIZE];        // buffered term freqs
   int32_t pointer;
diff --git a/src/core/CLucene/index/_SegmentTermEnum.h 
b/src/core/CLucene/index/_SegmentTermEnum.h
index b5fa419d4f..3dd2c8c5b8 100644
--- a/src/core/CLucene/index/_SegmentTermEnum.h
+++ b/src/core/CLucene/index/_SegmentTermEnum.h
@@ -55,7 +55,8 @@ protected:
 
 public:
        ///Constructor
-       SegmentTermEnum(CL_NS(store)::IndexInput* i, FieldInfos* fis, const 
bool isi, int32_t in_format = -1);
+       SegmentTermEnum(CL_NS(store)::IndexInput* i, FieldInfos* fis, const 
bool isi);
+        void init(int32_t in_format = -1);
 
        ///Destructor
        ~SegmentTermEnum();
diff --git a/src/core/CLucene/search/MultiPhraseQuery.cpp 
b/src/core/CLucene/search/MultiPhraseQuery.cpp
index 5427370261..107c8b11f4 100644
--- a/src/core/CLucene/search/MultiPhraseQuery.cpp
+++ b/src/core/CLucene/search/MultiPhraseQuery.cpp
@@ -211,8 +211,8 @@ Query* MultiPhraseQuery::rewrite(IndexReader* /*reader*/) {
          ArrayBase<Term*>* terms = termArrays->at(0);
          BooleanQuery* boq = _CLNEW BooleanQuery(true);
     for ( size_t i=0;i<terms->length;i++ ){
-                 boq->add(_CLNEW TermQuery((*terms)[i]), 
BooleanClause::SHOULD);
-         }
+                 boq->add(_CLNEW TermQuery((*terms)[i]), true, 
BooleanClause::SHOULD);
+               }
          boq->setBoost(getBoost());
          return boq;
   } else {
diff --git a/src/core/CLucene/search/query/TermIterator.h 
b/src/core/CLucene/search/query/TermIterator.h
index e0cf23a4fb..3eb22a254d 100644
--- a/src/core/CLucene/search/query/TermIterator.h
+++ b/src/core/CLucene/search/query/TermIterator.h
@@ -1,51 +1,54 @@
 #pragma once
 
-#include "CLucene/search/query/DcoIdSetIterator.h"
 #include "CLucene/index/Terms.h"
 
 #include <limits.h>
+#include <cstdint>
 
 CL_NS_USE(index)
 
-class TermIterator : public DocIdSetIterator {
+class TermIterator {
 public:
   TermIterator() = default;
-  TermIterator(TermDocs* termDocs) : termDocs_(termDocs) {
+  TermIterator(TermDocs* termDocs) 
+    : termDocs_(termDocs) {
   }
 
-  virtual ~TermIterator() = default;
-
-  bool isEmpty() {
+  inline bool isEmpty() const {
     return termDocs_ == nullptr;
   }
 
-  int32_t docID() override {
-    uint32_t docId = termDocs_->doc();
+  inline int32_t docID() const {
+    int32_t docId = termDocs_->doc();
     return docId >= INT_MAX ? INT_MAX : docId;
   }
 
-  int32_t nextDoc() override {
+  inline int32_t freq() const {
+    return termDocs_->freq();
+  }
+
+  inline int32_t nextDoc() const {
     if (termDocs_->next()) {
       return termDocs_->doc();
     }
     return INT_MAX;
   }
 
-  int32_t advance(int32_t target) override {
+  inline int32_t advance(int32_t target) const {
     if (termDocs_->skipTo(target)) {
       return termDocs_->doc();
     }
     return INT_MAX;
   }
 
-  int32_t docFreq() const override {
+  inline int32_t docFreq() const {
     return termDocs_->docFreq();
   }
 
-  bool readRange(DocRange* docRange) const override {
+  inline bool readRange(DocRange* docRange) const {
     return termDocs_->readRange(docRange);
   }
   
-private:
+protected:
   TermDocs* termDocs_ = nullptr;
 };
\ No newline at end of file
diff --git a/src/core/CLucene/search/query/TermPositionIterator.h 
b/src/core/CLucene/search/query/TermPositionIterator.h
new file mode 100644
index 0000000000..d64af4098f
--- /dev/null
+++ b/src/core/CLucene/search/query/TermPositionIterator.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "CLucene/search/query/TermIterator.h"
+#include "CLucene/index/Terms.h"
+
+#include <limits.h>
+
+CL_NS_USE(index)
+
+class TermPositionIterator : public TermIterator {
+public:
+  TermPositionIterator() = default;
+  TermPositionIterator(TermPositions* termPositions) 
+    : TermIterator(termPositions), termPositions_(termPositions) {
+  }
+
+  inline int32_t nextPosition() const {
+    return termPositions_->nextPosition();
+  }
+
+private:
+  TermPositions* termPositions_ = nullptr;
+};
\ No newline at end of file
diff --git a/src/core/CLucene/store/IndexOutput.cpp 
b/src/core/CLucene/store/IndexOutput.cpp
index 05e7695f92..77c37400d8 100644
--- a/src/core/CLucene/store/IndexOutput.cpp
+++ b/src/core/CLucene/store/IndexOutput.cpp
@@ -35,12 +35,13 @@ CL_NS_DEF(store)
                close();
   }
 
-  void BufferedIndexOutput::close(){
-    flush();
-    _CLDELETE_ARRAY( buffer );
-
-    bufferStart = 0;
-    bufferPosition = 0;
+  void BufferedIndexOutput::close() {
+      // flush may throw error here, if we do not delete buffer for all 
circumstances,
+      // we may close again in destructor above, that would cause pure virtual 
function call for flushBuffer
+      try {
+          flush();
+      }
+      _CLFINALLY(_CLDELETE_ARRAY(buffer); bufferStart = 0; bufferPosition = 0;)
   }
 
   void BufferedIndexOutput::writeByte(const uint8_t b) {
diff --git a/src/core/CLucene/util/CLStreams.h 
b/src/core/CLucene/util/CLStreams.h
index 121f272539..3f60f2d97a 100644
--- a/src/core/CLucene/util/CLStreams.h
+++ b/src/core/CLucene/util/CLStreams.h
@@ -196,7 +196,7 @@ public:
         this->init(_value, _length, copyData);
     }
     void init(const void *_value, int32_t _length, bool copyData = true) 
override {
-        const size_t length = _length;
+        const size_t length = (size_t)_length;
         this->pos = 0;
         if (copyData) {
             T *tmp = (T *) this->value;
diff --git a/src/core/CLucene/util/PriorityQueue.h 
b/src/core/CLucene/util/PriorityQueue.h
index 16b2bbac66..59cb0a8d31 100644
--- a/src/core/CLucene/util/PriorityQueue.h
+++ b/src/core/CLucene/util/PriorityQueue.h
@@ -39,7 +39,7 @@ class CLUCENE_INLINE_EXPORT PriorityQueue {
                        int32_t j = ((uint32_t)i) >> 1;
                        while (j > 0 && lessThan(node,heap[j])) {
                                heap[i] = heap[j];                        // 
shift parents down
-                               i = j;
+                               i = (size_t)j;
                                j = ((uint32_t)j) >> 1;
                        }
                        heap[i] = node;                           // install 
saved node
diff --git a/src/core/CLucene/util/bkd/bkd_docid_iterator.h 
b/src/core/CLucene/util/bkd/bkd_docid_iterator.h
index 491d3c4c5a..412228ad97 100644
--- a/src/core/CLucene/util/bkd/bkd_docid_iterator.h
+++ b/src/core/CLucene/util/bkd/bkd_docid_iterator.h
@@ -12,7 +12,7 @@ class bkd_docid_set{
 public:
     static const int NO_MORE_DOCS = std::numeric_limits<int32_t>::max();
 
-    explicit bkd_docid_set(int32_t size) {
+    explicit bkd_docid_set(size_t size) {
         docids.resize(size);
     }
     int32_t length() const {
@@ -22,7 +22,7 @@ public:
         if (_idx == _length) {
             _docid = NO_MORE_DOCS;
         } else {
-            _docid = docids[_offset + _idx];
+            _docid = docids[size_t(_offset + _idx)];
             _idx++;
         }
         return _docid;
@@ -48,7 +48,7 @@ public:
     explicit bkd_docid_bitmap_set(int32_t size) {}
     ~bkd_docid_bitmap_set() = default;
     void add(std::vector<char>&& r, int pos) {
-        docids[pos] = r;
+        docids[size_t(pos)] = r;
         _offset++;
     }
     void add(std::vector<char>&& r) {
@@ -66,7 +66,7 @@ public:
         if (_idx == _length) {
             _docid = std::vector<char>(0);
         } else {
-            _docid = docids[_offset + _idx];
+            _docid = docids[size_t(_offset + _idx)];
             _idx++;
         }
         return _docid;
diff --git a/src/core/CLucene/util/stringUtil.h 
b/src/core/CLucene/util/stringUtil.h
index 4a022e3e24..e7d41e1d83 100644
--- a/src/core/CLucene/util/stringUtil.h
+++ b/src/core/CLucene/util/stringUtil.h
@@ -41,7 +41,7 @@ public:
 
 #if defined(__SSE2__) || defined(__aarch64__)
         const auto bytes_sse = sizeof(__m128i);
-        const auto src_end_sse = src_end - (src_end - src) % bytes_sse;
+        const auto src_end_sse = src_end - size_t(src_end - src) % bytes_sse;
 
         const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound 
- 1);
         const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound 
+ 1);
@@ -243,7 +243,7 @@ public:
             }
         }
 
-        return n1 - n2;
+        return int(n1 - n2);
     }
 
     static inline int32_t utf8_byte_count(uint8_t c) {
@@ -275,10 +275,11 @@ public:
         int32_t bytes_in_char = 0;
         int32_t surplus_bytes = 0;
         uint32_t codepoint = 0;
-        for (uint8_t c : str) {
+        for (auto cc : str) {
+            char c = (char)cc;
             if (bytes_in_char == 0) {
                 if ((c & 0x80) == 0) {
-                    codepoint = c;
+                    codepoint = (uint32_t)c;
                     continue;
                 } else if ((c & 0xE0) == 0xC0) {
                     codepoint = c & 0x1F;
@@ -313,10 +314,10 @@ public:
         size_t i = 0;
         while (i < utf8_str.size()) {
             wchar_t wc = utf8_str[i];
-            int32_t n = utf8_byte_count(utf8_str[i]);
+            int32_t n = utf8_byte_count((uint8_t)utf8_str[i]);
             if ((n >= 1 && n <= 4) &&
-                (i + n <= utf8_str.size()) &&
-                validate_utf8(std::string_view(utf8_str.data() + i, n)) == 0) {
+                (i + (size_t)n <= utf8_str.size()) &&
+                validate_utf8(std::string_view(utf8_str.data() + i, 
(size_t)n)) == 0) {
                 if (n == 2) {
                     wc = ((utf8_str[i] & 0x1F) << 6) | (utf8_str[i + 1] & 
0x3F);
                 } else if (n == 3) {
@@ -324,7 +325,7 @@ public:
                 } else if (n == 4) {
                     wc = ((utf8_str[i] & 0x07) << 18) | ((utf8_str[i + 1] & 
0x3F) << 12) | ((utf8_str[i + 2] & 0x3F) << 6) | (utf8_str[i + 3] & 0x3F);
                 }
-                i += n;
+                i += (size_t)n;
             } else {
                 i += 1;
             }
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index e1c13305aa..b9a09bb306 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -199,8 +199,8 @@ SET(clucene_core_Files
     ./CLucene/search/spans/SpanWeight.cpp
     ./CLucene/search/spans/SpanWeight.h
     ./CLucene/search/spans/TermSpans.cpp
-    ./CLucene/search/query/DcoIdSetIterator.h
     ./CLucene/search/query/TermIterator.h
+    ./CLucene/search/query/TermPositionIterator.h
     )
 
 #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means 
rebuilding them for the core)
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index 20c722f80c..88c7c229dd 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -85,6 +85,7 @@ SET(test_files ./tests.cpp
         ./search/spans/TestSpanExplanations.h
         ./search/spans/TestSpanExplanationsOfNonMatches.cpp
         ./search/spans/TestSpanExplanationsOfNonMatches.h
+        ./index/TestIndexCompaction.cpp
         ./index/TestIndexModifier.cpp
         ./index/TestIndexWriter.cpp
         ./index/TestIndexModifier.cpp
@@ -102,6 +103,7 @@ SET(test_files ./tests.cpp
         ./util/TestStringBuffer.cpp
         ./util/English.cpp
         ./util/TestStrConvert.cpp
+        ./query/TestMultiPhraseQuery.cpp
         ${test_HEADERS})
 IF (USE_SHARED_OBJECT_FILES)
     GET_SHARED_FILES(clucene_shared_Files)
diff --git a/src/test/index/TestIndexCompaction.cpp 
b/src/test/index/TestIndexCompaction.cpp
new file mode 100644
index 0000000000..1d49c59788
--- /dev/null
+++ b/src/test/index/TestIndexCompaction.cpp
@@ -0,0 +1,344 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License
+
+#include <cstdint>
+#include <iostream>
+#include <utility>
+#include <vector>
+#include "CLucene/debug/mem.h"
+#include "test.h"
+#include "CLucene/debug/error.h"
+#include "CLucene/index/IndexWriter.h"
+#include "CLucene/store/IndexInput.h"
+#include "CLucene/store/IndexOutput.h"
+#include "roaring/roaring.hh"
+
+void _setupSourceNullBitmapValues(std::vector<std::vector<uint32_t>> 
&srcNullBitmapValues) {
+    srcNullBitmapValues.push_back(std::vector<uint32_t>{1, 2, 3});
+    srcNullBitmapValues.push_back(std::vector<uint32_t>{2, 3, 4});
+    srcNullBitmapValues.push_back(std::vector<uint32_t>{3, 4, 5});
+}
+
+void _setupTransVec(std::vector<std::vector<std::pair<uint32_t, uint32_t>>>& 
trans_vec) {
+
+    trans_vec.resize(3);
+    for (int i = 0; i < 3; i++) {
+        trans_vec[i].resize(6);
+    }
+    
+    trans_vec[0][0] = std::pair<uint32_t, uint32_t>{0, 1};
+    trans_vec[0][1] = std::pair<uint32_t, uint32_t>{0, 2};
+    trans_vec[0][2] = std::pair<uint32_t, uint32_t>{0, 5};
+    trans_vec[0][3] = std::pair<uint32_t, uint32_t>{0, 7};
+    trans_vec[0][4] = std::pair<uint32_t, uint32_t>{0, 3};
+    trans_vec[0][5] = std::pair<uint32_t, uint32_t>{0, 8};
+    trans_vec[1][0] = std::pair<uint32_t, uint32_t>{0, 4};
+    trans_vec[1][1] = std::pair<uint32_t, uint32_t>{0, 6};
+    trans_vec[1][2] = std::pair<uint32_t, uint32_t>{UINT32_MAX, UINT32_MAX};
+    trans_vec[1][3] = std::pair<uint32_t, uint32_t>{1, 1};
+    trans_vec[1][4] = std::pair<uint32_t, uint32_t>{1, 2};
+    trans_vec[1][5] = std::pair<uint32_t, uint32_t>{1, 9};
+    trans_vec[2][0] = std::pair<uint32_t, uint32_t>{1, 3};
+    trans_vec[2][1] = std::pair<uint32_t, uint32_t>{1, 4};
+    trans_vec[2][2] = std::pair<uint32_t, uint32_t>{1, 5};
+    trans_vec[2][3] = std::pair<uint32_t, uint32_t>{1, 6};
+    trans_vec[2][4] = std::pair<uint32_t, uint32_t>{1, 7};
+    trans_vec[2][5] = std::pair<uint32_t, uint32_t>{1, 8};
+}
+
+uint64_t _getNullBitmapCardinality(RAMDirectory& dir) {
+    IndexInput* null_bitmap_in = nullptr;
+    CLuceneError error;
+    dir.openInput(IndexWriter::NULL_BITMAP_FILE_NAME, null_bitmap_in, error);
+    if (error.number() != 0) {
+        return 0;
+    }
+    size_t null_bitmap_size = null_bitmap_in->length();
+    std::string buf;
+    buf.resize(null_bitmap_size);
+    
null_bitmap_in->readBytes(reinterpret_cast<uint8_t*>(const_cast<char*>(buf.data())),
 null_bitmap_size);
+    auto null_bitmap = roaring::Roaring::read(buf.data(), false);
+    null_bitmap.runOptimize();
+
+    // close resources
+    null_bitmap_in->close();
+    _CLLDELETE(null_bitmap_in);
+
+    return null_bitmap.cardinality();
+}
+
+// src segments -> dest segments
+//           3  -> 2
+// docs      18 -> 17
+// 1,2,3,4,5,6
+// 1,2,3,4,5,6  -> 1,2,3,4,5,6,7,8 
+// 1,2,3,4,5,6     1,2,3,4,5,6,7,8,9
+//
+// null values
+// 1,2,3
+// 2,3,4        -> 2,5,7
+// 3,4,5           1,2,6,7,8
+void TestMergeNullBitmapWriteNullBitmap(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    _setupSourceNullBitmapValues(srcNullBitmapValues);
+
+    // setup _trans_vec
+    // translation vec
+    // <<dest_idx_num, dest_docId>>
+    // the first level vector: index indicates src segment.
+    // the second level vector: index indicates row id of source segment,
+    // value indicates row id of destination segment.
+    // <UINT32_MAX, UINT32_MAX> indicates current row not exist.
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    _setupTransVec(trans_vec);
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+    
+    // 9 = 8 + 1
+    CLUCENE_ASSERT(source_cardinality == (dest_cardinality + 1));
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+void TestMergeNullBitmapEmptySrc(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    // empty source bitmap values
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    _setupTransVec(trans_vec);
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+    
+    // 0 = 0
+    CLUCENE_ASSERT(source_cardinality == dest_cardinality);
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+void TestMergeNullBitmapEmptyIndexSrcBitmapValues(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    // empty source bitmap values for every index
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    srcNullBitmapValues.push_back(std::vector<uint32_t>());
+    srcNullBitmapValues.push_back(std::vector<uint32_t>());
+    srcNullBitmapValues.push_back(std::vector<uint32_t>());
+
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    _setupTransVec(trans_vec);
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+    
+    // 0 = 0
+    CLUCENE_ASSERT(source_cardinality == dest_cardinality);
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+void TestMergeNullBitmapIgnoreDoc(CuTest *tc) {
+    lucene::analysis::SimpleAnalyzer<char> analyzer;
+    RAMDirectory dir;
+    auto* index_writer = _CLNEW lucene::index::IndexWriter(&dir, &analyzer, 
true);
+    std::vector<std::vector<uint32_t>> srcNullBitmapValues;
+    _setupSourceNullBitmapValues(srcNullBitmapValues);
+
+    std::vector<lucene::store::IndexOutput *> nullBitmapIndexOutputList;
+
+    // all docs in src index are ignored
+    std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec;
+    trans_vec.resize(srcNullBitmapValues.size());
+    for (int i = 0; i < trans_vec.size(); i++) {
+        trans_vec[i].resize(6);
+    }
+    for (int i = 0; i < srcNullBitmapValues.size(); i++) {
+        for (int j = 0; j < 6; j++) {
+            trans_vec[i][j] = std::pair<uint32_t, uint32_t>{UINT32_MAX, 
UINT32_MAX};
+        }
+    }
+
+    RAMDirectory dest_dir1;
+    RAMDirectory dest_dir2;
+    auto* dest_output_index1 = 
dest_dir1.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    auto* dest_output_index2 = 
dest_dir2.createOutput(IndexWriter::NULL_BITMAP_FILE_NAME);
+    nullBitmapIndexOutputList.push_back(dest_output_index1);
+    nullBitmapIndexOutputList.push_back(dest_output_index2);
+
+    try {
+        index_writer->setNumDestIndexes(2);
+        index_writer->setTransVec(trans_vec);
+        index_writer->mergeNullBitmap(srcNullBitmapValues, 
nullBitmapIndexOutputList);
+    } catch (const std::exception& ex) {
+        std::cout << "Caught exception: " << ex.what() << std::endl;
+    } catch (...) {
+        std::cout << "merge null bitmap failed" << std::endl;
+        return;
+    }
+    dest_output_index1->close();
+    dest_output_index2->close();
+    _CLLDELETE(dest_output_index1);
+    _CLLDELETE(dest_output_index2);
+    nullBitmapIndexOutputList.clear();
+    index_writer->close();
+    _CLDELETE(index_writer);
+
+    // check cardinality
+    uint64_t source_cardinality = 0;
+    for (const auto& vec : srcNullBitmapValues) {
+        source_cardinality += vec.size();
+    }
+    auto dest_cardinality1 = _getNullBitmapCardinality(dest_dir1);
+    auto dest_cardinality2 = _getNullBitmapCardinality(dest_dir2);
+    auto dest_cardinality = dest_cardinality1 + dest_cardinality2;
+
+    // 9 = 0 + 9
+    CLUCENE_ASSERT(source_cardinality == dest_cardinality + 
source_cardinality);
+    
+    // release resources
+    dest_dir1.close();
+    dest_dir2.close();
+    dir.close();
+}
+
+
+
+CuSuite* testIndexCompaction() {
+    CuSuite* suite = CuSuiteNew(_T("CLucene Index Compaction Test"));
+
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapWriteNullBitmap);
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapEmptySrc);
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapEmptyIndexSrcBitmapValues);
+    SUITE_ADD_TEST(suite, TestMergeNullBitmapIgnoreDoc);
+
+    return suite;
+}
\ No newline at end of file
diff --git a/src/test/query/TestMultiPhraseQuery.cpp 
b/src/test/query/TestMultiPhraseQuery.cpp
new file mode 100644
index 0000000000..ccc4fe7f89
--- /dev/null
+++ b/src/test/query/TestMultiPhraseQuery.cpp
@@ -0,0 +1,163 @@
+#include <CLucene.h>
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include "CLucene/debug/error.h"
+#include "CLucene/index/IndexReader.h"
+#include "CLucene/index/Term.h"
+#include "CLucene/search/MultiPhraseQuery.h"
+#include "CLucene/store/Directory.h"
+#include "CLucene/store/FSDirectory.h"
+#include "CLucene/store/RAMDirectory.h"
+#include "test.h"
+
+CL_NS_USE(util)
+CL_NS_USE(store)
+CL_NS_USE(search)
+CL_NS_USE(index)
+
+void testSimple1Add(CuTest* tc) {
+    RAMDirectory dir;
+
+    SimpleAnalyzer<char> analyzer;
+    IndexWriter w(&dir, &analyzer, true);
+    w.setUseCompoundFile(false);
+    auto field_name = lucene::util::Misc::_charToWide("name");
+    std::string value = "value";
+
+    Document doc;
+    auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
+
+    auto char_string_reader = 
std::make_unique<lucene::util::SStringReader<char>>();
+    char_string_reader->init(value.data(), value.size(), true);
+    auto stream = analyzer.tokenStream(field->name(), 
char_string_reader.get());
+    field->setValue(stream);
+    doc.add(*field);
+
+    w.addDocument(&doc);
+    w.close();
+
+    IndexSearcher index_searcher(&dir);
+    {
+        MultiPhraseQuery query;
+
+        Term* t1 = _CLNEW Term(_T( "name" ), _T( "t1" ));
+        query.add(t1);
+        _CLLDECDELETE(t1);
+
+        std::vector<int32_t> result;
+        index_searcher._search(&query, [&result](const int32_t docid, const 
float_t /*score*/) {
+            result.push_back(docid);
+        });
+        CLUCENE_ASSERT(result.size() == 0);
+    }
+
+    _CLDELETE(stream)
+    _CLDELETE_ARRAY(field_name)
+}
+
+void testSimple2Add(CuTest* tc) {
+    RAMDirectory dir;
+
+    SimpleAnalyzer<char> analyzer;
+    IndexWriter w(&dir, &analyzer, true);
+    w.setUseCompoundFile(false);
+    auto field_name = lucene::util::Misc::_charToWide("name");
+    std::string value = "value";
+
+    Document doc;
+    auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
+
+    auto char_string_reader = 
std::make_unique<lucene::util::SStringReader<char>>();
+    char_string_reader->init(value.data(), value.size(), true);
+    auto stream = analyzer.tokenStream(field->name(), 
char_string_reader.get());
+    field->setValue(stream);
+    doc.add(*field);
+
+    w.addDocument(&doc);
+    w.close();
+
+    IndexSearcher index_searcher(&dir);
+    {
+        MultiPhraseQuery query;
+
+        std::vector<Term*> terms;
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t2" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t3" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t4" )));
+        query.add(terms);
+        for (int32_t i = 0; i < terms.size(); i++) {
+            _CLLDECDELETE(terms[i]);
+        }
+
+        std::vector<int32_t> result;
+        index_searcher._search(&query, [&result](const int32_t docid, const 
float_t /*score*/) {
+            result.push_back(docid);
+        });
+        CLUCENE_ASSERT(result.size() == 0);
+    }
+
+    _CLDELETE(stream)
+    _CLDELETE_ARRAY(field_name)
+}
+
+void testMultiAdd(CuTest* tc) {
+    RAMDirectory dir;
+
+    SimpleAnalyzer<char> analyzer;
+    IndexWriter w(&dir, &analyzer, true);
+    w.setUseCompoundFile(false);
+    auto field_name = lucene::util::Misc::_charToWide("name");
+    std::string value = "value";
+
+    Document doc;
+    auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
+
+    auto char_string_reader = 
std::make_unique<lucene::util::SStringReader<char>>();
+    char_string_reader->init(value.data(), value.size(), true);
+    auto stream = analyzer.tokenStream(field->name(), 
char_string_reader.get());
+    field->setValue(stream);
+    doc.add(*field);
+
+    w.addDocument(&doc);
+    w.close();
+
+    IndexSearcher index_searcher(&dir);
+    {
+        MultiPhraseQuery query;
+
+        Term* t1 = _CLNEW Term(_T( "name" ), _T( "t1" ));
+        query.add(t1);
+        _CLLDECDELETE(t1);
+
+        std::vector<Term*> terms;
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t2" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t3" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t4" )));
+        query.add(terms);
+        for (int32_t i = 0; i < terms.size(); i++) {
+            _CLLDECDELETE(terms[i]);
+        }
+
+        std::vector<int32_t> result;
+        index_searcher._search(&query, [&result](const int32_t docid, const 
float_t /*score*/) {
+            result.push_back(docid);
+        });
+        CLUCENE_ASSERT(result.size() == 0);
+    }
+
+    _CLDELETE(stream)
+    _CLDELETE_ARRAY(field_name)
+}
+
+CuSuite* testMultiPhraseQuery(void) {
+    CuSuite* suite = CuSuiteNew(_T("CLucene MultiPhraseQuery Test"));
+
+    SUITE_ADD_TEST(suite, testSimple1Add);
+    SUITE_ADD_TEST(suite, testSimple2Add);
+    SUITE_ADD_TEST(suite, testMultiAdd);
+
+    return suite;
+}
\ No newline at end of file
diff --git a/src/test/test.h b/src/test/test.h
index 08c168cb67..cb92953882 100644
--- a/src/test/test.h
+++ b/src/test/test.h
@@ -82,6 +82,8 @@ CuSuite *testTermVectorsReader(void);
 CuSuite *teststandard95(void);
 CuSuite *testStrConvert(void);
 CuSuite *testSearchRange(void);
+CuSuite *testMultiPhraseQuery(void);
+CuSuite *testIndexCompaction(void);
 
 #ifdef TEST_CONTRIB_LIBS
 //CuSuite *testGermanAnalyzer(void);
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index d703e15973..3a193a3c38 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -16,6 +16,8 @@ unittest tests[] = {
         {"MSBRadixSorter",testMSBRadixSorter},
         {"strconvert", testStrConvert},
         {"searchRange", testSearchRange},
+        {"MultiPhraseQuery", testMultiPhraseQuery},
+        {"IndexCompaction", testIndexCompaction},
 #ifdef TEST_CONTRIB_LIBS
         {"chinese", testchinese},
 #endif


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to