This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch clucene in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push: new 641247f [Fix](clucene) fix clucene makefile and file name (#27) 641247f is described below commit 641247fe4989c9de67693139f08d2a92af913a0d Author: airborne12 <airborn...@gmail.com> AuthorDate: Tue Jan 10 23:12:32 2023 +0800 [Fix](clucene) fix clucene makefile and file name (#27) --- .clang-format | 16 ++++ .../CLucene/analysis/LanguageBasedAnalyzer.cpp | 5 ++ .../CLucene/analysis/LanguageBasedAnalyzer.h | 9 ++- .../CLucene/analysis/jieba/ChineseTokenizer.cpp | 28 ++++--- .../CLucene/analysis/jieba/ChineseTokenizer.h | 8 +- .../CLucene/analysis/jieba/Logging.hpp | 3 +- src/core/CLucene/analysis/Analyzers.cpp | 4 +- src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp | 4 +- src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h | 4 +- .../bkd/{docIds_writer.cpp => docids_writer.cpp} | 0 .../util/bkd/{docIds_writer.h => docids_writer.h} | 0 src/core/CLucene/util/bkd/point_reader.cpp | 8 +- src/core/CMakeLists.txt | 88 ++++++++-------------- src/test/analysis/TestAnalysis.cpp | 2 +- src/test/analysis/TestAnalyzers.cpp | 2 +- src/test/contribs-lib/analysis/testChinese.cpp | 10 ++- src/test/document/TestDocument.cpp | 4 +- src/test/tests.cpp | 13 ++-- src/test/util/TestBKD.cpp | 32 ++++---- src/test/util/TestBKD.h | 10 +-- 20 files changed, 128 insertions(+), 122 deletions(-) diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..3b8b570 --- /dev/null +++ b/.clang-format @@ -0,0 +1,16 @@ +--- +Language: Cpp +BasedOnStyle: Google +AccessModifierOffset: -4 +AllowShortFunctionsOnASingleLine: Inline +ColumnLimit: 100 +ConstructorInitializerIndentWidth: 8 # double of IndentWidth +ContinuationIndentWidth: 8 # double of IndentWidth +DerivePointerAlignment: false # always use PointerAlignment +IndentCaseLabels: false +IndentWidth: 4 +PointerAlignment: Left +ReflowComments: false +SortUsingDeclarations: false +SpacesBeforeTrailingComments: 1 +SpaceBeforeCpp11BracedList: true diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp index 13294de..1af3a9c 100644 --- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp +++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.cpp @@ -37,6 +37,11 @@ void LanguageBasedAnalyzer::setLanguage(const TCHAR *language) { void LanguageBasedAnalyzer::setStem(bool stem) { this->stem = stem; } +void LanguageBasedAnalyzer::initDict(const std::string& dictPath) { + if (_tcscmp(lang, _T("chinese")) == 0) { + CL_NS2(analysis, jieba)::ChineseTokenizer::init(dictPath); + } +} TokenStream *LanguageBasedAnalyzer::tokenStream(const TCHAR *fieldName, Reader *reader) { TokenStream *ret = NULL; if (_tcscmp(lang, _T("cjk")) == 0) { diff --git a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h index 596c86b..147dc1e 100644 --- a/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h +++ b/src/contribs-lib/CLucene/analysis/LanguageBasedAnalyzer.h @@ -15,12 +15,13 @@ class CLUCENE_CONTRIBS_EXPORT LanguageBasedAnalyzer: public CL_NS(analysis)::Ana TCHAR lang[100]; bool stem; public: - LanguageBasedAnalyzer(const TCHAR* language=NULL, bool stem=true); - ~LanguageBasedAnalyzer(); + explicit LanguageBasedAnalyzer(const TCHAR* language=nullptr, bool stem=true); + ~LanguageBasedAnalyzer() override; void setLanguage(const TCHAR* language); void setStem(bool stem); - TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); - }; + void initDict(const std::string& dictPath); + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader) override; +}; CL_NS_END #endif diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp index 02c50aa..410c514 100644 --- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp +++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.cpp @@ -2,27 +2,30 @@ #include "ChineseTokenizer.h" #include "CLucene/util/CLStreams.h" #include <filesystem> +#include <memory> namespace fs = std::filesystem; CL_NS_DEF2(analysis,jieba) CL_NS_USE(analysis) CL_NS_USE(util) -std::string get_dict_path() { - if(const char* env_p = std::getenv("DICT_PATH")) { - return env_p; - } - return ""; +std::unique_ptr<cppjieba::Jieba> ChineseTokenizer::cppjieba = nullptr; +ChineseTokenizer::ChineseTokenizer(lucene::util::Reader *reader) : Tokenizer(reader) { + buffer[0] = 0; } -static unique_ptr<cppjieba::Jieba> cppjieba = std::make_unique<cppjieba::Jieba>( - get_dict_path() + "dict/jieba.dict.utf8", - get_dict_path() + "dict/hmm_model.utf8", - get_dict_path() + "dict/user.dict.utf8", - get_dict_path() + "dict/idf.utf8", - get_dict_path() + "dict/stop_words.utf8"); +void ChineseTokenizer::init(const std::string &dictPath) { + if(cppjieba == nullptr) { + cppjieba = std::make_unique<cppjieba::Jieba>( + dictPath + "/" + "dict/jieba.dict.utf8", + dictPath + "/" + "dict/hmm_model.utf8", + dictPath + "/" + "dict/user.dict.utf8", + dictPath + "/" + "dict/idf.utf8", + dictPath + "/" + "dict/stop_words.utf8"); + } +} -CL_NS(analysis)::Token* ChineseTokenizer::next(lucene::analysis::Token* token) { +CL_NS(analysis)::Token *ChineseTokenizer::next(lucene::analysis::Token *token) { // try to read all words if (dataLen == 0) { auto bufferLen = input->read((const void **) &ioBuffer, 1, 0); @@ -32,6 +35,7 @@ CL_NS(analysis)::Token* ChineseTokenizer::next(lucene::analysis::Token* token) { } char tmp_buffer[4 * bufferLen]; lucene_wcsntoutf8(tmp_buffer, ioBuffer, bufferLen, 4 * bufferLen); + init(); cppjieba->Cut(tmp_buffer, tokens_text, true); dataLen = tokens_text.size(); } diff --git a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h index cecdd17..61ab100 100644 --- a/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h +++ b/src/contribs-lib/CLucene/analysis/jieba/ChineseTokenizer.h @@ -32,16 +32,14 @@ private: * members of Tokenizer) */ const TCHAR* ioBuffer{}; - //std::unique_ptr<cppjieba::Jieba> cppjieba; std::vector<std::string> tokens_text; std::vector<std::unique_ptr<Token>> tokens; public: + static std::unique_ptr<cppjieba::Jieba> cppjieba; // Constructor - explicit ChineseTokenizer(lucene::util::Reader *reader) : Tokenizer(reader) { - - buffer[0]=0; - } + explicit ChineseTokenizer(lucene::util::Reader *reader); + static void init(const std::string& dictPath=""); // Destructor ~ChineseTokenizer() override {} diff --git a/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp b/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp index 77540ce..b0ec473 100644 --- a/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp +++ b/src/contribs-lib/CLucene/analysis/jieba/Logging.hpp @@ -72,7 +72,8 @@ namespace limonp { #endif std::cerr << stream_.str() << std::endl; if (level_ == LL_FATAL) { - abort(); + _CLTHROWA (CL_ERR_UNKNOWN, "failed in chinese tokenizer"); + //abort(); } } diff --git a/src/core/CLucene/analysis/Analyzers.cpp b/src/core/CLucene/analysis/Analyzers.cpp index 2783a10..6ca4183 100644 --- a/src/core/CLucene/analysis/Analyzers.cpp +++ b/src/core/CLucene/analysis/Analyzers.cpp @@ -507,13 +507,13 @@ Token* KeywordTokenizer::next(Token* token){ if (rd == -1) break; if ( upto == token->bufferLength() ){ - termBuffer = (TCHAR*)token->resizeTermBuffer<TCHAR>(token->bufferLength() + 8); + termBuffer = (TCHAR*)token->resizeTermBuffer<TCHAR>(token->bufferLength() + rd); } _tcsncpy(termBuffer + upto, readBuffer, rd); upto += rd; } if ( token->bufferLength() < upto + 1 ){ - termBuffer=(TCHAR *)token->resizeTermBuffer<TCHAR>(token->bufferLength() + 8); + termBuffer=(TCHAR *)token->resizeTermBuffer<TCHAR>(token->bufferLength() + upto); } termBuffer[upto]=0; token->setTermLength<TCHAR>(upto); diff --git a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp index 5e94659..dc77f6b 100644 --- a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp +++ b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.cpp @@ -9,7 +9,7 @@ bkd_msb_radix_sorter::bkd_msb_radix_sorter( int dim, int32_t bytes) : MSBRadixSorter(bytes), dim(dim), writer(writer), heap_writer(heap_writer) { } -int bkd_msb_radix_sorter::byte_at(int i, int k) { +int bkd_msb_radix_sorter::byteAt(int i, int k) { assert(k >= 0); if (k < writer->bytes_per_dim_) { // dim bytes @@ -76,4 +76,4 @@ void bkd_msb_radix_sorter::swap(int i, int j) { } } -CL_NS_END2 \ No newline at end of file +CL_NS_END2 diff --git a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h index 81ace9e..84db6d1 100644 --- a/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h +++ b/src/core/CLucene/util/bkd/bkd_msb_radix_sorter.h @@ -23,7 +23,7 @@ public: int dim, int32_t bytes); protected: - int byte_at(int i, int k) override; + int byteAt(int i, int k) override; void swap(int i, int j) override; }; -CL_NS_END2 \ No newline at end of file +CL_NS_END2 diff --git a/src/core/CLucene/util/bkd/docIds_writer.cpp b/src/core/CLucene/util/bkd/docids_writer.cpp similarity index 100% rename from src/core/CLucene/util/bkd/docIds_writer.cpp rename to src/core/CLucene/util/bkd/docids_writer.cpp diff --git a/src/core/CLucene/util/bkd/docIds_writer.h b/src/core/CLucene/util/bkd/docids_writer.h similarity index 100% rename from src/core/CLucene/util/bkd/docIds_writer.h rename to src/core/CLucene/util/bkd/docids_writer.h diff --git a/src/core/CLucene/util/bkd/point_reader.cpp b/src/core/CLucene/util/bkd/point_reader.cpp index 8fc52ea..08fd24c 100644 --- a/src/core/CLucene/util/bkd/point_reader.cpp +++ b/src/core/CLucene/util/bkd/point_reader.cpp @@ -27,19 +27,19 @@ int64_t point_reader::split(int64_t count, assert(result); const std::vector<uint8_t> &packedValue = packed_value(); int64_t ordinal = ord(); - int32_t docid = docid(); + int32_t doc_id = docid(); if (rightTree->Get(ordinal)) { - right->append(packedValue, ordinal, docid); + right->append(packedValue, ordinal, doc_id); rightCount++; if (doClearBits) { rightTree->Clear(ordinal); } } else { - left->append(packedValue, ordinal, docid); + left->append(packedValue, ordinal, doc_id); } } return rightCount; } -CL_NS_END2 \ No newline at end of file +CL_NS_END2 diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index c526e70..066d453 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -33,32 +33,32 @@ SET(clucene_core_Files ./CLucene/util/MD5Digester.cpp ./CLucene/util/StringIntern.cpp ./CLucene/util/BitSet.cpp - CLucene/util/bkd/bkd_writer.cpp - CLucene/util/bkd/bkd_reader.cpp - CLucene/util/bkd/index_tree.cpp - CLucene/util/bkd/packed_index_tree.cpp - CLucene/util/bkd/legacy_index_tree.cpp - CLucene/util/bkd/heap_point_writer.cpp - CLucene/util/bkd/heap_point_reader.cpp - CLucene/util/bkd/point_reader.cpp - CLucene/util/bkd/docIds_writer.cpp - CLucene/util/bkd/bkd_msb_radix_sorter.cpp - CLucene/util/croaring/roaring.c - CLucene/util/croaring/roaring.h - CLucene/util/croaring/roaring.hh - ./CLucene/util/BitUtil.cpp - ./CLucene/util/BytesRef.cpp - ./CLucene/util/BytesRefBuilder.cpp - ./CLucene/util/CodecUtil.cpp - ./CLucene/util/LongBitSet.cpp - ./CLucene/util/IntroSorter.cpp - ./CLucene/util/Sorter.cpp - ./CLucene/util/MSBRadixSorter.cpp - ./CLucene/util/FixedBitSet.cpp - ./CLucene/util/FutureArrays.cpp - ./CLucene/util/NumericUtils.cpp - ./CLucene/util/stringUtil.cpp - ./CLucene/queryParser/FastCharStream.cpp + ./CLucene/util/bkd/bkd_writer.cpp + ./CLucene/util/bkd/bkd_reader.cpp + ./CLucene/util/bkd/index_tree.cpp + ./CLucene/util/bkd/packed_index_tree.cpp + ./CLucene/util/bkd/legacy_index_tree.cpp + ./CLucene/util/bkd/heap_point_writer.cpp + ./CLucene/util/bkd/heap_point_reader.cpp + ./CLucene/util/bkd/point_reader.cpp + ./CLucene/util/bkd/docids_writer.cpp + ./CLucene/util/bkd/bkd_msb_radix_sorter.cpp + ./CLucene/util/croaring/roaring.c + ./CLucene/util/croaring/roaring.h + ./CLucene/util/croaring/roaring.hh + ./CLucene/util/BitUtil.cpp + ./CLucene/util/BytesRef.cpp + ./CLucene/util/BytesRefBuilder.cpp + ./CLucene/util/CodecUtil.cpp + ./CLucene/util/LongBitSet.cpp + ./CLucene/util/IntroSorter.cpp + ./CLucene/util/Sorter.cpp + ./CLucene/util/MSBRadixSorter.cpp + ./CLucene/util/FixedBitSet.cpp + ./CLucene/util/FutureArrays.cpp + ./CLucene/util/NumericUtils.cpp + ./CLucene/util/stringUtil.cpp + ./CLucene/queryParser/FastCharStream.cpp ./CLucene/queryParser/MultiFieldQueryParser.cpp ./CLucene/queryParser/QueryParser.cpp ./CLucene/queryParser/QueryParserTokenManager.cpp @@ -69,25 +69,14 @@ SET(clucene_core_Files ./CLucene/queryParser/legacy/QueryParserBase.cpp ./CLucene/queryParser/legacy/QueryToken.cpp ./CLucene/queryParser/legacy/TokenList.cpp - - #./CLucene/queryParser/QueryBuilder.cpp - #./CLucene/queryParser/classic/QueryParserBase.cpp - #./CLucene/queryParser/classic/QueryParser.cpp - #./CLucene/queryParser/classic/Token.cpp - #./CLucene/queryParser/classic/TokenMgrError.cpp - #./CLucene/queryParser/classic/CharStream.cpp - #./CLucene/queryParser/classic/ParseException.cpp - #./CLucene/queryParser/classic/QueryParserTokenManager.cpp ./CLucene/analysis/standard/StandardAnalyzer.cpp ./CLucene/analysis/standard/StandardFilter.cpp ./CLucene/analysis/standard/StandardTokenizer.cpp - #./CLucene/analysis/mmseg/MMsegAnalyzer.cpp - #./CLucene/analysis/mmseg/MmsegTokenizer.cpp ./CLucene/analysis/Analyzers.cpp ./CLucene/analysis/AnalysisHeader.cpp ./CLucene/store/MMapInput.cpp ./CLucene/store/IndexInput.cpp - ./CLucene/store/ByteArrayDataInput.cpp + ./CLucene/store/ByteArrayDataInput.cpp ./CLucene/store/Lock.cpp ./CLucene/store/LockFactory.cpp ./CLucene/store/IndexOutput.cpp @@ -113,10 +102,10 @@ SET(clucene_core_Files ./CLucene/index/Terms.cpp ./CLucene/index/MergePolicy.cpp ./CLucene/index/DocumentsWriter.cpp - ./CLucene/index/SDocumentWriter.cpp - ./CLucene/index/SDocumentWriter.h - ./CLucene/index/DocumentsWriterThreadState.cpp - ./CLucene/index/SegmentTermVector.cpp + ./CLucene/index/SDocumentWriter.cpp + ./CLucene/index/SDocumentWriter.h + ./CLucene/index/DocumentsWriterThreadState.cpp + ./CLucene/index/SegmentTermVector.cpp ./CLucene/index/TermVectorReader.cpp ./CLucene/index/FieldInfos.cpp ./CLucene/index/CompoundFile.cpp @@ -213,21 +202,6 @@ SET(clucene_core_Files ./CLucene/search/spans/TermSpans.cpp ) -#if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") -# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=haswell -DAVX2_ON -fstrict-aliasing") - #ADD_DEFINITIONS(-DAVX2_ON) - #ADD_DEFINITIONS(-DSSE2_ON) -#endif() - -#SET(pfor_Files_SSE -# ${clucene-ext_SOURCE_DIR}/for/transpose.c -# ${clucene-ext_SOURCE_DIR}/for/bitunpack.c -# ${clucene-ext_SOURCE_DIR}/for/bitpack.c -# ${clucene-ext_SOURCE_DIR}/for/bitutil.c -# ${clucene-ext_SOURCE_DIR}/for/vp4d.c -# ${clucene-ext_SOURCE_DIR}/for/vp4c.c -# ) - #if USE_SHARED_OBJECT_FILES then we link directly to the object files (means rebuilding them for the core) IF ( USE_SHARED_OBJECT_FILES ) GET_SHARED_FILES(clucene_shared_Files) diff --git a/src/test/analysis/TestAnalysis.cpp b/src/test/analysis/TestAnalysis.cpp index f741939..73e4337 100644 --- a/src/test/analysis/TestAnalysis.cpp +++ b/src/test/analysis/TestAnalysis.cpp @@ -87,7 +87,7 @@ void testChar(CuTest *tc) { while (stream->next(&t) != NULL) { count++; } - printf("count = %d\n", count); + //printf("count = %d\n", count); _CLDELETE(stream); } diff --git a/src/test/analysis/TestAnalyzers.cpp b/src/test/analysis/TestAnalyzers.cpp index f9a2d09..449d5e2 100644 --- a/src/test/analysis/TestAnalyzers.cpp +++ b/src/test/analysis/TestAnalyzers.cpp @@ -486,7 +486,7 @@ _CLLDELETE(reader); } -CuSuite *testanalyzers(void) +CuSuite *testanalyzers() { CuSuite *suite = CuSuiteNew(_T("CLucene Analyzers Test")); diff --git a/src/test/contribs-lib/analysis/testChinese.cpp b/src/test/contribs-lib/analysis/testChinese.cpp index c74ba1f..d75a000 100644 --- a/src/test/contribs-lib/analysis/testChinese.cpp +++ b/src/test/contribs-lib/analysis/testChinese.cpp @@ -142,6 +142,13 @@ void testCJK(CuTest *tc) { _testCJK(tc, "a\xe5\x95\xa4\xe9\x85\x92\xe5\x95\xa4x", exp2); } +std::string get_dict_path() { + if(const char* env_p = std::getenv("DICT_PATH")) { + return env_p; + } + return ""; +} + void testSimpleJiebaTokenizer(CuTest* tc) { LanguageBasedAnalyzer a; CL_NS(util)::StringReader reader(_T("我爱你中国")); @@ -152,6 +159,7 @@ void testSimpleJiebaTokenizer(CuTest* tc) { //test with chinese a.setLanguage(_T("chinese")); a.setStem(false); + a.initDict(get_dict_path()); ts = a.tokenStream(_T("contents"), &reader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -574,7 +582,7 @@ void testLanguageBasedAnalyzer(CuTest* tc) { } CuSuite *testchinese(void) { - CuSuite *suite = CuSuiteNew(_T("CLucene Analysis Test")); + CuSuite *suite = CuSuiteNew(_T("CLucene chinese tokenizer Test")); SUITE_ADD_TEST(suite, testFile); SUITE_ADD_TEST(suite, testCJK); diff --git a/src/test/document/TestDocument.cpp b/src/test/document/TestDocument.cpp index f6f392c..b90f67d 100644 --- a/src/test/document/TestDocument.cpp +++ b/src/test/document/TestDocument.cpp @@ -471,8 +471,8 @@ CuSuite *testdocument(void) { SUITE_ADD_TEST(suite, TestFields); SUITE_ADD_TEST(suite, TestMultiSetValueField); SUITE_ADD_TEST(suite, TestMultiAddValueField); - SUITE_ADD_TEST(suite, TestSetFieldBench); - SUITE_ADD_TEST(suite, TestNewFieldBench); + //SUITE_ADD_TEST(suite, TestSetFieldBench); + //SUITE_ADD_TEST(suite, TestNewFieldBench); SUITE_ADD_TEST(suite, TestReaderValueField); SUITE_ADD_TEST(suite, TestAddDocument); //SUITE_ADD_TEST(suite, TestDateTools); diff --git a/src/test/tests.cpp b/src/test/tests.cpp index 32aedad..f1aee90 100644 --- a/src/test/tests.cpp +++ b/src/test/tests.cpp @@ -11,10 +11,10 @@ unittest tests[] = { // {"indexreader", testindexreader}, // {"indexsearcher", testIndexSearcher}, // {"reuters", testreuters}, -// {"analysis", testanalysis}, -// {"analyzers", testanalyzers}, -// {"document", testdocument}, -// {"field", testField}, + {"analysis", testanalysis}, + {"analyzers", testanalyzers}, + {"document", testdocument}, + {"field", testField}, // {"numbertools", testNumberTools}, // {"debug", testdebug}, // {"ramdirectory", testRAMDirectory}, @@ -39,14 +39,13 @@ unittest tests[] = { // {"store", teststore}, // {"utf8", testutf8}, // {"bitset", testBitSet}, -// {"bkd", testBKD}, -// {"MSBRadixSorter",testMSBRadixSorter}, + {"bkd", testBKD}, + {"MSBRadixSorter",testMSBRadixSorter}, // {"extractterms", testExtractTerms}, // {"spanqueries", testSpanQueries}, // {"stringbuffer", testStringBuffer}, // {"termvectorsreader", testTermVectorsReader}, #ifdef TEST_CONTRIB_LIBS {"chinese", testchinese}, - //{"germananalyzer", testGermanAnalyzer}, #endif {"LastTest", NULL}}; diff --git a/src/test/util/TestBKD.cpp b/src/test/util/TestBKD.cpp index 4c43216..2dcc22c 100644 --- a/src/test/util/TestBKD.cpp +++ b/src/test/util/TestBKD.cpp @@ -53,14 +53,14 @@ void TestVisitor1::visit(Roaring *docID, std::vector<uint8_t> &packedValue) { visit(*docID); } -void TestVisitor1::visit(bkd::bkd_docID_set_iterator *iter, std::vector<uint8_t> &packedValue) { +void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) { if (!matches(packedValue.data())) { return; } - int32_t docID = iter->docID_set->nextDoc(); - while (docID != lucene::util::bkd::bkd_docID_set::NO_MORE_DOCS) { + int32_t docID = iter->docid_set->nextDoc(); + while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) { hits->set(docID); - docID = iter->docID_set->nextDoc(); + docID = iter->docid_set->nextDoc(); } } @@ -376,9 +376,9 @@ void testBug1Read(CuTest *tc) { //printf("something wrong in read\n"); printf("clucene error: %s\n", r.what()); } - printf("hits count=%d\n", result->count()); + //printf("hits count=%d\n", result->count()); CuAssertEquals(tc, result->count(), 6); - printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } } @@ -444,9 +444,9 @@ void testLowCardinalInts1DRead2(CuTest *tc) { //printf("something wrong in read\n"); printf("clucene error: %s\n", r.what()); } - printf("hits count=%d\n", hits->count()); + //printf("hits count=%d\n", hits->count()); CuAssertEquals(tc, hits->count(), 12928); - printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } } @@ -475,9 +475,9 @@ void testLowCardinalInts1DRead(CuTest *tc) { //printf("something wrong in read\n"); printf("clucene error: %s\n", r.what()); } - printf("hits count=%d\n", hits->count()); + //printf("hits count=%d\n", hits->count()); CuAssertEquals(tc, hits->count(), 256); - printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } } @@ -551,7 +551,7 @@ void testBasicsInts1DRead(CuTest *tc) { //assertEquals(L"docID=" + to_wstring(docID), expected, actual); } - printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); auto hits1 = std::make_shared<BitSet>(N); auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1); str = Misc::currentTimeMillis(); @@ -566,7 +566,7 @@ void testBasicsInts1DRead(CuTest *tc) { CuAssertEquals(tc, expected, actual); //assertEquals(L"docID=" + to_wstring(docID), expected, actual); } - printf("\nSecond search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + //printf("\nSecond search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } dir->close(); _CLDECDELETE(dir); @@ -604,15 +604,15 @@ void testHttplogsRead(CuTest *tc) { //CuAssertEquals(tc, 0, type); r->read_index(index_in_); r->intersect(v.get()); - printf("\ntry query result:%ld\n", r->estimate_point_count(v.get())); - printf("\nsearch time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + //printf("\ntry query result:%ld\n", r->estimate_point_count(v.get())); + //printf("\nsearch time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } catch (CLuceneError &r) { //printf("something wrong in read\n"); printf("clucene error: %s\n", r.what()); } - printf("result size = %d\n", result->count()); + //printf("result size = %d\n", result->count()); CuAssertEquals(tc, result->count(), 8445); - printf("stats=%s\n", r->stats.to_string().c_str()); + //printf("stats=%s\n", r->stats.to_string().c_str()); } dir->close(); _CLDECDELETE(dir); diff --git a/src/test/util/TestBKD.h b/src/test/util/TestBKD.h index 8a79552..3264313 100644 --- a/src/test/util/TestBKD.h +++ b/src/test/util/TestBKD.h @@ -33,7 +33,7 @@ public: } void visit(Roaring *docID, std::vector<uint8_t> &packedValue) override; void visit(int docID, std::vector<uint8_t> &packedValue) override; - void visit(lucene::util::bkd::bkd_docID_set_iterator *iter, std::vector<uint8_t> &packedValue) override; + void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) override; bool matches(uint8_t *packedValue); @@ -91,14 +91,14 @@ public: } visit(*docID); }; - void visit(lucene::util::bkd::bkd_docID_set_iterator *iter, std::vector<uint8_t> &packedValue) override { + void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) override { if (!matches(packedValue.data())) { return; } - int32_t docID = iter->docID_set->nextDoc(); - while (docID != lucene::util::bkd::bkd_docID_set::NO_MORE_DOCS) { + int32_t docID = iter->docid_set->nextDoc(); + while (docID != lucene::util::bkd::bkd_docid_set::NO_MORE_DOCS) { hits->set(docID); - docID = iter->docID_set->nextDoc(); + docID = iter->docid_set->nextDoc(); } }; bool matches(uint8_t *packedValue); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org