[doris-thirdparty] branch clucene updated: [Fix](clucene) fix clucene makefile and file name (#27)

morningman Tue, 10 Jan 2023 07:12:43 -0800

This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git



The following commit(s) were added to refs/heads/clucene by this push:
     new 641247f  [Fix](clucene) fix clucene makefile and file name (#27)
641247f is described below

commit 641247fe4989c9de67693139f08d2a92af913a0d
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Tue Jan 10 23:12:32 2023 +0800

    [Fix](clucene) fix clucene makefile and file name (#27)
---
 .clang-format                                      | 16 ++++
 .../CLucene/analysis/LanguageBasedAnalyzer.cpp     |  5 ++
 .../CLucene/analysis/LanguageBasedAnalyzer.h       |  9 ++-
 .../CLucene/analysis/jieba/ChineseTokenizer.cpp    | 28 ++++---
 .../CLucene/analysis/jieba/ChineseTokenizer.h      |  8 +-
 .../CLucene/analysis/jieba/Logging.hpp             |  3 +-
 src/core/CLucene/analysis/Analyzers.cpp            |  4 +-
 src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp |  4 +-
 src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h   |  4 +-
 .../bkd/{docIds_writer.cpp => docids_writer.cpp}   |  0
 .../util/bkd/{docIds_writer.h => docids_writer.h}  |  0
 src/core/CLucene/util/bkd/point_reader.cpp         |  8 +-
 src/core/CMakeLists.txt                            | 88 ++++++++--------------
 src/test/analysis/TestAnalysis.cpp                 |  2 +-
 src/test/analysis/TestAnalyzers.cpp                |  2 +-
 src/test/contribs-lib/analysis/testChinese.cpp     | 10 ++-
 src/test/document/TestDocument.cpp                 |  4 +-
 src/test/tests.cpp                                 | 13 ++--
 src/test/util/TestBKD.cpp                          | 32 ++++----
 src/test/util/TestBKD.h                            | 10 +--
 20 files changed, 128 insertions(+), 122 deletions(-)

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..3b8b570
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,16 @@
+---
+Language: Cpp
+BasedOnStyle: Google
+AccessModifierOffset: -4
+AllowShortFunctionsOnASingleLine: Inline
+ColumnLimit: 100
+ConstructorInitializerIndentWidth: 8 # double of IndentWidth
+ContinuationIndentWidth: 8 # double of IndentWidth
+DerivePointerAlignment: false # always use PointerAlignment
+IndentCaseLabels: false
+IndentWidth: 4
+PointerAlignment: Left
+ReflowComments: false
+SortUsingDeclarations: false
+SpacesBeforeTrailingComments: 1
+SpaceBeforeCpp11BracedList: true
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp 
b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
index 13294de..1af3a9c 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp
@@ -37,6 +37,11 @@ void LanguageBasedAnalyzer::setLanguage(const TCHAR 
*language) {
 void LanguageBasedAnalyzer::setStem(bool stem) {
     this->stem = stem;
 }
+void LanguageBasedAnalyzer::initDict(const std::string& dictPath) {
+    if (_tcscmp(lang, _T("chinese")) == 0) {
+        CL_NS2(analysis, jieba)::ChineseTokenizer::init(dictPath);
+    }
+}
 TokenStream *LanguageBasedAnalyzer::tokenStream(const TCHAR *fieldName, Reader 
*reader) {
     TokenStream *ret = NULL;
     if (_tcscmp(lang, _T("cjk")) == 0) {
diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h 
b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
index 596c86b..147dc1e 100644
--- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
+++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h
@@ -15,12 +15,13 @@ class CLUCENE_CONTRIBS_EXPORT LanguageBasedAnalyzer: public 
CL_NS(analysis)::Ana
        TCHAR lang[100];
        bool stem;
 public:
-       LanguageBasedAnalyzer(const TCHAR* language=NULL, bool stem=true);
-       ~LanguageBasedAnalyzer();
+       explicit LanguageBasedAnalyzer(const TCHAR* language=nullptr, bool 
stem=true);
+       ~LanguageBasedAnalyzer() override;
        void setLanguage(const TCHAR* language);
        void setStem(bool stem);
-       TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* 
reader);
-  };
+    void initDict(const std::string& dictPath);
+       TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* 
reader) override;
+};
 
 CL_NS_END
 #endif
diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp 
b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
index 02c50aa..410c514 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp
@@ -2,27 +2,30 @@
 #include "ChineseTokenizer.h"
 #include "CLucene/util/CLStreams.h"
 #include <filesystem>
+#include <memory>
 namespace fs = std::filesystem;
 
 CL_NS_DEF2(analysis,jieba)
 CL_NS_USE(analysis)
 CL_NS_USE(util)
 
-std::string get_dict_path() {
-    if(const char* env_p = std::getenv("DICT_PATH")) {
-        return env_p;
-    }
-    return "";
+std::unique_ptr<cppjieba::Jieba> ChineseTokenizer::cppjieba = nullptr;
+ChineseTokenizer::ChineseTokenizer(lucene::util::Reader *reader) : 
Tokenizer(reader) {
+    buffer[0] = 0;
 }
 
-static unique_ptr<cppjieba::Jieba> cppjieba = 
std::make_unique<cppjieba::Jieba>(
-        get_dict_path() + "dict/jieba.dict.utf8",
-        get_dict_path() + "dict/hmm_model.utf8",
-        get_dict_path() + "dict/user.dict.utf8",
-        get_dict_path() + "dict/idf.utf8",
-        get_dict_path() + "dict/stop_words.utf8");
+void ChineseTokenizer::init(const std::string &dictPath) {
+    if(cppjieba == nullptr) {
+        cppjieba = std::make_unique<cppjieba::Jieba>(
+                dictPath + "/" + "dict/jieba.dict.utf8",
+                dictPath + "/" + "dict/hmm_model.utf8",
+                dictPath + "/" + "dict/user.dict.utf8",
+                dictPath + "/" + "dict/idf.utf8",
+                dictPath + "/" + "dict/stop_words.utf8");
+    }
+}
 
-CL_NS(analysis)::Token* ChineseTokenizer::next(lucene::analysis::Token* token) 
{
+CL_NS(analysis)::Token *ChineseTokenizer::next(lucene::analysis::Token *token) 
{
     // try to read all words
     if (dataLen == 0) {
         auto bufferLen = input->read((const void **) &ioBuffer, 1, 0);
@@ -32,6 +35,7 @@ CL_NS(analysis)::Token* 
ChineseTokenizer::next(lucene::analysis::Token* token) {
         }
         char tmp_buffer[4 * bufferLen];
         lucene_wcsntoutf8(tmp_buffer, ioBuffer, bufferLen, 4 * bufferLen);
+        init();
         cppjieba->Cut(tmp_buffer, tokens_text, true);
         dataLen = tokens_text.size();
     }
diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h 
b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
index cecdd17..61ab100 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
+++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h
@@ -32,16 +32,14 @@ private:
      * members of Tokenizer)
      */
     const TCHAR* ioBuffer{};
-    //std::unique_ptr<cppjieba::Jieba> cppjieba;
     std::vector<std::string> tokens_text;
     std::vector<std::unique_ptr<Token>> tokens;
 
 public:
+    static std::unique_ptr<cppjieba::Jieba> cppjieba;
     // Constructor
-    explicit ChineseTokenizer(lucene::util::Reader *reader) : 
Tokenizer(reader) {
-
-        buffer[0]=0;
-    }
+    explicit ChineseTokenizer(lucene::util::Reader *reader);
+    static void init(const std::string& dictPath="");
 
     // Destructor
     ~ChineseTokenizer() override {}
diff --git a/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp 
b/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp
index 77540ce..b0ec473 100644
--- a/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp
+++ b/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp
@@ -72,7 +72,8 @@ namespace limonp {
 #endif
             std::cerr << stream_.str() << std::endl;
             if (level_ == LL_FATAL) {
-                abort();
+                _CLTHROWA (CL_ERR_UNKNOWN, "failed in chinese tokenizer");
+                //abort();
             }
         }
 
diff --git a/src/core/CLucene/analysis/Analyzers.cpp 
b/src/core/CLucene/analysis/Analyzers.cpp
index 2783a10..6ca4183 100644
--- a/src/core/CLucene/analysis/Analyzers.cpp
+++ b/src/core/CLucene/analysis/Analyzers.cpp
@@ -507,13 +507,13 @@ Token* KeywordTokenizer::next(Token* token){
       if (rd == -1)
                    break;
       if ( upto == token->bufferLength() ){
-        termBuffer = 
(TCHAR*)token->resizeTermBuffer<TCHAR>(token->bufferLength() + 8);
+        termBuffer = 
(TCHAR*)token->resizeTermBuffer<TCHAR>(token->bufferLength() + rd);
       }
            _tcsncpy(termBuffer + upto, readBuffer, rd);
       upto += rd;
     }
     if ( token->bufferLength() < upto + 1 ){
-      termBuffer=(TCHAR *)token->resizeTermBuffer<TCHAR>(token->bufferLength() 
+ 8);
+      termBuffer=(TCHAR *)token->resizeTermBuffer<TCHAR>(token->bufferLength() 
+ upto);
     }
     termBuffer[upto]=0;
     token->setTermLength<TCHAR>(upto);
diff --git a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp 
b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp
index 5e94659..dc77f6b 100644
--- a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp
+++ b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp
@@ -9,7 +9,7 @@ bkd_msb_radix_sorter::bkd_msb_radix_sorter(
         int dim, int32_t bytes) : MSBRadixSorter(bytes), dim(dim), 
writer(writer), heap_writer(heap_writer) {
 }
 
-int bkd_msb_radix_sorter::byte_at(int i, int k) {
+int bkd_msb_radix_sorter::byteAt(int i, int k) {
     assert(k >= 0);
     if (k < writer->bytes_per_dim_) {
         // dim bytes
@@ -76,4 +76,4 @@ void bkd_msb_radix_sorter::swap(int i, int j) {
     }
 }
 
-CL_NS_END2
\ No newline at end of file
+CL_NS_END2
diff --git a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h 
b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h
index 81ace9e..84db6d1 100644
--- a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h
+++ b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h
@@ -23,7 +23,7 @@ public:
             int dim, int32_t bytes);
 
 protected:
-    int byte_at(int i, int k) override;
+    int byteAt(int i, int k) override;
     void swap(int i, int j) override;
 };
-CL_NS_END2
\ No newline at end of file
+CL_NS_END2
diff --git a/src/core/CLucene/util/bkd/docIds_writer.cpp 
b/src/core/CLucene/util/bkd/docids_writer.cpp
similarity index 100%
rename from src/core/CLucene/util/bkd/docIds_writer.cpp
rename to src/core/CLucene/util/bkd/docids_writer.cpp
diff --git a/src/core/CLucene/util/bkd/docIds_writer.h 
b/src/core/CLucene/util/bkd/docids_writer.h
similarity index 100%
rename from src/core/CLucene/util/bkd/docIds_writer.h
rename to src/core/CLucene/util/bkd/docids_writer.h
diff --git a/src/core/CLucene/util/bkd/point_reader.cpp 
b/src/core/CLucene/util/bkd/point_reader.cpp
index 8fc52ea..08fd24c 100644
--- a/src/core/CLucene/util/bkd/point_reader.cpp
+++ b/src/core/CLucene/util/bkd/point_reader.cpp
@@ -27,19 +27,19 @@ int64_t point_reader::split(int64_t count,
         assert(result);
         const std::vector<uint8_t> &packedValue = packed_value();
         int64_t ordinal = ord();
-        int32_t docid = docid();
+        int32_t doc_id = docid();
         if (rightTree->Get(ordinal)) {
-            right->append(packedValue, ordinal, docid);
+            right->append(packedValue, ordinal, doc_id);
             rightCount++;
             if (doClearBits) {
                 rightTree->Clear(ordinal);
             }
         } else {
-            left->append(packedValue, ordinal, docid);
+            left->append(packedValue, ordinal, doc_id);
         }
     }
 
     return rightCount;
 }
 
-CL_NS_END2
\ No newline at end of file
+CL_NS_END2
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index c526e70..066d453 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -33,32 +33,32 @@ SET(clucene_core_Files
        ./CLucene/util/MD5Digester.cpp
        ./CLucene/util/StringIntern.cpp
        ./CLucene/util/BitSet.cpp
-               CLucene/util/bkd/bkd_writer.cpp
-        CLucene/util/bkd/bkd_reader.cpp
-               CLucene/util/bkd/index_tree.cpp
-               CLucene/util/bkd/packed_index_tree.cpp
-               CLucene/util/bkd/legacy_index_tree.cpp
-               CLucene/util/bkd/heap_point_writer.cpp
-               CLucene/util/bkd/heap_point_reader.cpp
-               CLucene/util/bkd/point_reader.cpp
-               CLucene/util/bkd/docIds_writer.cpp
-        CLucene/util/bkd/bkd_msb_radix_sorter.cpp
-               CLucene/util/croaring/roaring.c
-               CLucene/util/croaring/roaring.h
-               CLucene/util/croaring/roaring.hh
-               ./CLucene/util/BitUtil.cpp
-               ./CLucene/util/BytesRef.cpp
-               ./CLucene/util/BytesRefBuilder.cpp
-               ./CLucene/util/CodecUtil.cpp
-               ./CLucene/util/LongBitSet.cpp
-               ./CLucene/util/IntroSorter.cpp
-               ./CLucene/util/Sorter.cpp
-               ./CLucene/util/MSBRadixSorter.cpp
-               ./CLucene/util/FixedBitSet.cpp
-               ./CLucene/util/FutureArrays.cpp
-               ./CLucene/util/NumericUtils.cpp
-               ./CLucene/util/stringUtil.cpp
-               ./CLucene/queryParser/FastCharStream.cpp
+       ./CLucene/util/bkd/bkd_writer.cpp
+        ./CLucene/util/bkd/bkd_reader.cpp
+       ./CLucene/util/bkd/index_tree.cpp
+       ./CLucene/util/bkd/packed_index_tree.cpp
+       ./CLucene/util/bkd/legacy_index_tree.cpp
+       ./CLucene/util/bkd/heap_point_writer.cpp
+       ./CLucene/util/bkd/heap_point_reader.cpp
+       ./CLucene/util/bkd/point_reader.cpp
+       ./CLucene/util/bkd/docids_writer.cpp
+        ./CLucene/util/bkd/bkd_msb_radix_sorter.cpp
+        ./CLucene/util/croaring/roaring.c
+       ./CLucene/util/croaring/roaring.h
+       ./CLucene/util/croaring/roaring.hh
+       ./CLucene/util/BitUtil.cpp
+       ./CLucene/util/BytesRef.cpp
+       ./CLucene/util/BytesRefBuilder.cpp
+       ./CLucene/util/CodecUtil.cpp
+       ./CLucene/util/LongBitSet.cpp
+       ./CLucene/util/IntroSorter.cpp
+       ./CLucene/util/Sorter.cpp
+       ./CLucene/util/MSBRadixSorter.cpp
+       ./CLucene/util/FixedBitSet.cpp
+       ./CLucene/util/FutureArrays.cpp
+       ./CLucene/util/NumericUtils.cpp
+       ./CLucene/util/stringUtil.cpp
+       ./CLucene/queryParser/FastCharStream.cpp
        ./CLucene/queryParser/MultiFieldQueryParser.cpp
        ./CLucene/queryParser/QueryParser.cpp
        ./CLucene/queryParser/QueryParserTokenManager.cpp
@@ -69,25 +69,14 @@ SET(clucene_core_Files
        ./CLucene/queryParser/legacy/QueryParserBase.cpp
        ./CLucene/queryParser/legacy/QueryToken.cpp
        ./CLucene/queryParser/legacy/TokenList.cpp
-
-               #./CLucene/queryParser/QueryBuilder.cpp
-       #./CLucene/queryParser/classic/QueryParserBase.cpp
-       #./CLucene/queryParser/classic/QueryParser.cpp
-       #./CLucene/queryParser/classic/Token.cpp
-       #./CLucene/queryParser/classic/TokenMgrError.cpp
-       #./CLucene/queryParser/classic/CharStream.cpp
-       #./CLucene/queryParser/classic/ParseException.cpp
-       #./CLucene/queryParser/classic/QueryParserTokenManager.cpp
        ./CLucene/analysis/standard/StandardAnalyzer.cpp
        ./CLucene/analysis/standard/StandardFilter.cpp
        ./CLucene/analysis/standard/StandardTokenizer.cpp
-       #./CLucene/analysis/mmseg/MMsegAnalyzer.cpp
-       #./CLucene/analysis/mmseg/MmsegTokenizer.cpp
        ./CLucene/analysis/Analyzers.cpp
        ./CLucene/analysis/AnalysisHeader.cpp
        ./CLucene/store/MMapInput.cpp
        ./CLucene/store/IndexInput.cpp
-               ./CLucene/store/ByteArrayDataInput.cpp
+       ./CLucene/store/ByteArrayDataInput.cpp
        ./CLucene/store/Lock.cpp
        ./CLucene/store/LockFactory.cpp
        ./CLucene/store/IndexOutput.cpp
@@ -113,10 +102,10 @@ SET(clucene_core_Files
        ./CLucene/index/Terms.cpp
        ./CLucene/index/MergePolicy.cpp
        ./CLucene/index/DocumentsWriter.cpp
-               ./CLucene/index/SDocumentWriter.cpp
-               ./CLucene/index/SDocumentWriter.h
-               ./CLucene/index/DocumentsWriterThreadState.cpp
-               ./CLucene/index/SegmentTermVector.cpp
+       ./CLucene/index/SDocumentWriter.cpp
+       ./CLucene/index/SDocumentWriter.h
+       ./CLucene/index/DocumentsWriterThreadState.cpp
+       ./CLucene/index/SegmentTermVector.cpp
        ./CLucene/index/TermVectorReader.cpp
        ./CLucene/index/FieldInfos.cpp
        ./CLucene/index/CompoundFile.cpp
@@ -213,21 +202,6 @@ SET(clucene_core_Files
        ./CLucene/search/spans/TermSpans.cpp
 )
 
-#if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
-#      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=haswell -DAVX2_ON 
-fstrict-aliasing")
-       #ADD_DEFINITIONS(-DAVX2_ON)
-       #ADD_DEFINITIONS(-DSSE2_ON)
-#endif()
-
-#SET(pfor_Files_SSE
-#              ${clucene-ext_SOURCE_DIR}/for/transpose.c
-#              ${clucene-ext_SOURCE_DIR}/for/bitunpack.c
-#              ${clucene-ext_SOURCE_DIR}/for/bitpack.c
-#              ${clucene-ext_SOURCE_DIR}/for/bitutil.c
-#              ${clucene-ext_SOURCE_DIR}/for/vp4d.c
-#              ${clucene-ext_SOURCE_DIR}/for/vp4c.c
-#              )
-
 #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means 
rebuilding them for the core)
 IF ( USE_SHARED_OBJECT_FILES )
     GET_SHARED_FILES(clucene_shared_Files)
diff --git a/src/test/analysis/TestAnalysis.cpp 
b/src/test/analysis/TestAnalysis.cpp
index f741939..73e4337 100644
--- a/src/test/analysis/TestAnalysis.cpp
+++ b/src/test/analysis/TestAnalysis.cpp
@@ -87,7 +87,7 @@ void testChar(CuTest *tc) {
     while (stream->next(&t) != NULL) {
         count++;
     }
-    printf("count = %d\n", count);
+    //printf("count = %d\n", count);
     _CLDELETE(stream);
 }
 
diff --git a/src/test/analysis/TestAnalyzers.cpp 
b/src/test/analysis/TestAnalyzers.cpp
index f9a2d09..449d5e2 100644
--- a/src/test/analysis/TestAnalyzers.cpp
+++ b/src/test/analysis/TestAnalyzers.cpp
@@ -486,7 +486,7 @@
       _CLLDELETE(reader);
   }
 
-CuSuite *testanalyzers(void)
+CuSuite *testanalyzers()
 {
        CuSuite *suite = CuSuiteNew(_T("CLucene Analyzers Test"));
 
diff --git a/src/test/contribs-lib/analysis/testChinese.cpp 
b/src/test/contribs-lib/analysis/testChinese.cpp
index c74ba1f..d75a000 100644
--- a/src/test/contribs-lib/analysis/testChinese.cpp
+++ b/src/test/contribs-lib/analysis/testChinese.cpp
@@ -142,6 +142,13 @@ void testCJK(CuTest *tc) {
     _testCJK(tc, "a\xe5\x95\xa4\xe9\x85\x92\xe5\x95\xa4x", exp2);
 }
 
+std::string get_dict_path() {
+    if(const char* env_p = std::getenv("DICT_PATH")) {
+        return env_p;
+    }
+    return "";
+}
+
 void testSimpleJiebaTokenizer(CuTest* tc) {
     LanguageBasedAnalyzer a;
     CL_NS(util)::StringReader reader(_T("我爱你中国"));
@@ -152,6 +159,7 @@ void testSimpleJiebaTokenizer(CuTest* tc) {
     //test with chinese
     a.setLanguage(_T("chinese"));
     a.setStem(false);
+    a.initDict(get_dict_path());
     ts = a.tokenStream(_T("contents"), &reader);
 
     CLUCENE_ASSERT(ts->next(&t) != NULL);
@@ -574,7 +582,7 @@ void testLanguageBasedAnalyzer(CuTest* tc) {
 }
 
 CuSuite *testchinese(void) {
-    CuSuite *suite = CuSuiteNew(_T("CLucene Analysis Test"));
+    CuSuite *suite = CuSuiteNew(_T("CLucene chinese tokenizer Test"));
 
     SUITE_ADD_TEST(suite, testFile);
     SUITE_ADD_TEST(suite, testCJK);
diff --git a/src/test/document/TestDocument.cpp 
b/src/test/document/TestDocument.cpp
index f6f392c..b90f67d 100644
--- a/src/test/document/TestDocument.cpp
+++ b/src/test/document/TestDocument.cpp
@@ -471,8 +471,8 @@ CuSuite *testdocument(void) {
     SUITE_ADD_TEST(suite, TestFields);
     SUITE_ADD_TEST(suite, TestMultiSetValueField);
     SUITE_ADD_TEST(suite, TestMultiAddValueField);
-    SUITE_ADD_TEST(suite, TestSetFieldBench);
-    SUITE_ADD_TEST(suite, TestNewFieldBench);
+    //SUITE_ADD_TEST(suite, TestSetFieldBench);
+    //SUITE_ADD_TEST(suite, TestNewFieldBench);
     SUITE_ADD_TEST(suite, TestReaderValueField);
     SUITE_ADD_TEST(suite, TestAddDocument);
     //SUITE_ADD_TEST(suite, TestDateTools);
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index 32aedad..f1aee90 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -11,10 +11,10 @@ unittest tests[] = {
 //        {"indexreader", testindexreader},
 //        {"indexsearcher", testIndexSearcher},
 //        {"reuters", testreuters},
-//        {"analysis", testanalysis},
-//        {"analyzers", testanalyzers},
-//        {"document", testdocument},
-//        {"field", testField},
+        {"analysis", testanalysis},
+        {"analyzers", testanalyzers},
+        {"document", testdocument},
+        {"field", testField},
 //        {"numbertools", testNumberTools},
 //        {"debug", testdebug},
 //        {"ramdirectory", testRAMDirectory},
@@ -39,14 +39,13 @@ unittest tests[] = {
 //        {"store", teststore},
 //        {"utf8", testutf8},
 //        {"bitset", testBitSet},
-//        {"bkd", testBKD},
-//        {"MSBRadixSorter",testMSBRadixSorter},
+        {"bkd", testBKD},
+        {"MSBRadixSorter",testMSBRadixSorter},
 //        {"extractterms", testExtractTerms},
 //        {"spanqueries", testSpanQueries},
 //        {"stringbuffer", testStringBuffer},
 //        {"termvectorsreader", testTermVectorsReader},
 #ifdef TEST_CONTRIB_LIBS
         {"chinese", testchinese},
-        //{"germananalyzer", testGermanAnalyzer},
 #endif
         {"LastTest", NULL}};
diff --git a/src/test/util/TestBKD.cpp b/src/test/util/TestBKD.cpp
index 4c43216..2dcc22c 100644
--- a/src/test/util/TestBKD.cpp
+++ b/src/test/util/TestBKD.cpp
@@ -53,14 +53,14 @@ void TestVisitor1::visit(Roaring *docID, 
std::vector<uint8_t> &packedValue) {
     visit(*docID);
 }
 
-void TestVisitor1::visit(bkd::bkd_docID_set_iterator *iter, 
std::vector<uint8_t> &packedValue) {
+void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter, 
std::vector<uint8_t> &packedValue) {
     if (!matches(packedValue.data())) {
         return;
     }
-    int32_t docID = iter->docID_set->nextDoc();
-    while (docID != lucene::util::bkd::bkd_docID_set::NO_MORE_DOCS) {
+    int32_t docID = iter->docid_set->nextDoc();
+    while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) {
         hits->set(docID);
-        docID = iter->docID_set->nextDoc();
+        docID = iter->docid_set->nextDoc();
     }
 }
 
@@ -376,9 +376,9 @@ void testBug1Read(CuTest *tc) {
             //printf("something wrong in read\n");
             printf("clucene error: %s\n", r.what());
         }
-        printf("hits count=%d\n", result->count());
+        //printf("hits count=%d\n", result->count());
         CuAssertEquals(tc, result->count(), 6);
-        printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
+        //printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
     }
 }
 
@@ -444,9 +444,9 @@ void testLowCardinalInts1DRead2(CuTest *tc) {
             //printf("something wrong in read\n");
             printf("clucene error: %s\n", r.what());
         }
-        printf("hits count=%d\n", hits->count());
+        //printf("hits count=%d\n", hits->count());
         CuAssertEquals(tc, hits->count(), 12928);
-        printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
+        //printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
     }
 }
 
@@ -475,9 +475,9 @@ void testLowCardinalInts1DRead(CuTest *tc) {
             //printf("something wrong in read\n");
             printf("clucene error: %s\n", r.what());
         }
-        printf("hits count=%d\n", hits->count());
+        //printf("hits count=%d\n", hits->count());
         CuAssertEquals(tc, hits->count(), 256);
-        printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
+        //printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
     }
 }
 
@@ -551,7 +551,7 @@ void testBasicsInts1DRead(CuTest *tc) {
 
             //assertEquals(L"docID=" + to_wstring(docID), expected, actual);
         }
-        printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
+        //printf("\nFirst search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
         auto hits1 = std::make_shared<BitSet>(N);
         auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1);
         str = Misc::currentTimeMillis();
@@ -566,7 +566,7 @@ void testBasicsInts1DRead(CuTest *tc) {
             CuAssertEquals(tc, expected, actual);
             //assertEquals(L"docID=" + to_wstring(docID), expected, actual);
         }
-        printf("\nSecond search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
+        //printf("\nSecond search time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
     }
     dir->close();
     _CLDECDELETE(dir);
@@ -604,15 +604,15 @@ void testHttplogsRead(CuTest *tc) {
             //CuAssertEquals(tc, 0, type);
             r->read_index(index_in_);
             r->intersect(v.get());
-            printf("\ntry query result:%ld\n", 
r->estimate_point_count(v.get()));
-            printf("\nsearch time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
+            //printf("\ntry query result:%ld\n", 
r->estimate_point_count(v.get()));
+            //printf("\nsearch time taken: %d ms\n\n", (int32_t) 
(Misc::currentTimeMillis() - str));
         } catch (CLuceneError &r) {
             //printf("something wrong in read\n");
             printf("clucene error: %s\n", r.what());
         }
-        printf("result size = %d\n", result->count());
+        //printf("result size = %d\n", result->count());
         CuAssertEquals(tc, result->count(), 8445);
-        printf("stats=%s\n", r->stats.to_string().c_str());
+        //printf("stats=%s\n", r->stats.to_string().c_str());
     }
     dir->close();
     _CLDECDELETE(dir);
diff --git a/src/test/util/TestBKD.h b/src/test/util/TestBKD.h
index 8a79552..3264313 100644
--- a/src/test/util/TestBKD.h
+++ b/src/test/util/TestBKD.h
@@ -33,7 +33,7 @@ public:
     }
     void visit(Roaring *docID, std::vector<uint8_t> &packedValue) override;
     void visit(int docID, std::vector<uint8_t> &packedValue) override;
-    void visit(lucene::util::bkd::bkd_docID_set_iterator *iter, 
std::vector<uint8_t> &packedValue) override;
+    void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, 
std::vector<uint8_t> &packedValue) override;
 
     bool matches(uint8_t *packedValue);
 
@@ -91,14 +91,14 @@ public:
         }
         visit(*docID);
     };
-    void visit(lucene::util::bkd::bkd_docID_set_iterator *iter, 
std::vector<uint8_t> &packedValue) override {
+    void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, 
std::vector<uint8_t> &packedValue) override {
         if (!matches(packedValue.data())) {
             return;
         }
-        int32_t docID = iter->docID_set->nextDoc();
-        while (docID != lucene::util::bkd::bkd_docID_set::NO_MORE_DOCS) {
+        int32_t docID = iter->docid_set->nextDoc();
+        while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) {
             hits->set(docID);
-            docID = iter->docID_set->nextDoc();
+            docID = iter->docid_set->nextDoc();
         }
     };
     bool matches(uint8_t *packedValue);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

[doris-thirdparty] branch clucene updated: [Fix](clucene) fix clucene makefile and file name (#27)

Reply via email to