This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch clucene in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push: new d75e5a15 [Update](unitest) make unitest work for clucene (#160) d75e5a15 is described below commit d75e5a152aeb9154ba64848867b01baeeb535257 Author: airborne12 <airborn...@gmail.com> AuthorDate: Fri Dec 22 18:50:37 2023 +0800 [Update](unitest) make unitest work for clucene (#160) --- src/core/CLucene/util/stringUtil.h | 1 + src/test/CMakeLists.txt | 14 + src/test/analysis/TestAnalysis.cpp | 3 +- src/test/contribs-lib/analysis/testChinese.cpp | 23 +- src/test/document/TestDocument.cpp | 172 +++++---- src/test/search/TestSearchRange.cpp | 2 +- src/test/test.h | 2 +- src/test/tests.cpp | 34 +- src/test/util/TestBKD.cpp | 486 +++++++++++++++++-------- src/test/util/TestBKD.h | 21 +- src/test/util/TestMSBRadixSorter.cpp | 12 +- 11 files changed, 478 insertions(+), 292 deletions(-) diff --git a/src/core/CLucene/util/stringUtil.h b/src/core/CLucene/util/stringUtil.h index 7d97e735..1616737d 100644 --- a/src/core/CLucene/util/stringUtil.h +++ b/src/core/CLucene/util/stringUtil.h @@ -12,6 +12,7 @@ #endif #include <cstring> +#include <assert.h> #include "SSEUtil.h" template <typename T> diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index a487f343..20c722f8 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -272,6 +272,20 @@ IF (BUILD_STATIC_LIBRARIES) TARGET_LINK_LIBRARIES(cl_test clucene-core-static clucene-shared-static ic gtest ${EXTRA_LIBS} ${Roaring_LIBRARY}) ENDIF (UNIX) + SET(DATA_SOURCE_DIR ${clucene_SOURCE_DIR}/src/test/data) + SET(DATA_TARGET_DIR "${EXECUTABLE_OUTPUT_PATH}/data") + + ADD_CUSTOM_COMMAND(TARGET cl_test POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${DATA_SOURCE_DIR} ${DATA_TARGET_DIR} + COMMENT "Copying ${DATA_SOURCE_DIR} to ${DATA_TARGET_DIR}/data") + + SET(DICT_SOURCE_DIR ${clucene_SOURCE_DIR}/src/contribs-lib/CLucene/analysis/jieba/dict) + SET(DICT_TARGET_DIR "${EXECUTABLE_OUTPUT_PATH}/dict") + + ADD_CUSTOM_COMMAND(TARGET cl_test POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory ${DICT_SOURCE_DIR} ${DICT_TARGET_DIR} + COMMENT "Copying ${DATA_SOURCE_DIR} to ${DATA_TARGET_DIR}/dict") + ENDIF (BUILD_STATIC_LIBRARIES) ############################ diff --git a/src/test/analysis/TestAnalysis.cpp b/src/test/analysis/TestAnalysis.cpp index da51aa06..a29b65ed 100644 --- a/src/test/analysis/TestAnalysis.cpp +++ b/src/test/analysis/TestAnalysis.cpp @@ -5,6 +5,7 @@ * the GNU Lesser General Public License, as specified in the COPYING file. ------------------------------------------------------------------------------*/ #include "test.h" +#include <memory> void test(CuTest *tc, Reader *reader, bool verbose, int64_t bytes) { StandardAnalyzer analyzer; @@ -63,7 +64,7 @@ void testTokenStreamField(CuTest *tc) { TokenStream *stream = analyzer.reusableTokenStream(L"field1", &reader); int field_config = lucene::document::Field::STORE_NO | lucene::document::Field::INDEX_TOKENIZED; - auto field = _CLNEW Field(L"field1", field_config); + auto field = std::make_unique<Field>(L"field1", field_config); field->setValue(stream); auto s = field->tokenStreamValue(); int32_t count = 0; diff --git a/src/test/contribs-lib/analysis/testChinese.cpp b/src/test/contribs-lib/analysis/testChinese.cpp index 7e47aa2d..2aeb0367 100644 --- a/src/test/contribs-lib/analysis/testChinese.cpp +++ b/src/test/contribs-lib/analysis/testChinese.cpp @@ -142,13 +142,6 @@ void testCJK(CuTest *tc) { _testCJK(tc, "a\xe5\x95\xa4\xe9\x85\x92\xe5\x95\xa4x", exp2); } -std::string get_dict_path() { - if(const char* env_p = std::getenv("DICT_PATH")) { - return env_p; - } - return ""; -} - void testSimpleJiebaSearchModeTokenizer2(CuTest* tc) { LanguageBasedAnalyzer a; const char* field_value_data = "冰咒龙"; @@ -161,7 +154,7 @@ void testSimpleJiebaSearchModeTokenizer2(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::Search); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -184,7 +177,7 @@ void testSimpleJiebaAllModeTokenizer2(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::All); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -209,7 +202,7 @@ void testSimpleJiebaAllModeTokenizer(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::All); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -240,7 +233,7 @@ void testSimpleJiebaDefaultModeTokenizer2(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::Default); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); /*char tmp[255] = {}; @@ -277,7 +270,7 @@ void testSimpleJiebaDefaultModeTokenizer(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::Default); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -302,7 +295,7 @@ void testSimpleJiebaSearchModeTokenizer(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::Search); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -335,7 +328,7 @@ void testSimpleJiebaTokenizer(CuTest* tc) { a.setLanguage(_T("chinese")); a.setStem(false); a.setMode(lucene::analysis::AnalyzerMode::Default); - a.initDict(get_dict_path()); + a.initDict("./dict"); ts = a.tokenStream(_T("contents"), stringReader); CLUCENE_ASSERT(ts->next(&t) != NULL); @@ -652,7 +645,7 @@ void testJiebaMatchHuge(CuTest* tc) { auto analyzer = _CLNEW lucene::analysis::LanguageBasedAnalyzer(); analyzer->setLanguage(L"chinese"); analyzer->setMode(lucene::analysis::AnalyzerMode::Default); - analyzer->initDict(get_dict_path()); + analyzer->initDict("./dict"); IndexWriter w(&dir, analyzer, true); auto field_name = lucene::util::Misc::_charToWide("chinese"); diff --git a/src/test/document/TestDocument.cpp b/src/test/document/TestDocument.cpp index 93d52e8e..7dc44a68 100644 --- a/src/test/document/TestDocument.cpp +++ b/src/test/document/TestDocument.cpp @@ -58,95 +58,123 @@ public: void TestReaderValueField(CuTest *tc) { RAMDirectory dir; - SimpleAnalyzer<TCHAR> analyzer; + SimpleAnalyzer<char> analyzer; IndexWriter w(&dir, &analyzer, true); + w.setUseCompoundFile(false); auto field_name = lucene::util::Misc::_charToWide("f3"); Document doc; auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); - auto value1 = lucene::util::Misc::_charToWide("value1"); - auto stringReader = _CLNEW StringReader(value1, wcslen(value1), false); - field->setValue(stringReader); + auto char_string_reader = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader->init("value1", 6, true); + auto stream = analyzer.tokenStream(field->name(), char_string_reader.get()); + field->setValue(stream); doc.add(*field); w.addDocument(&doc); w.close(); IndexSearcher searcher(&dir); - Term *t1 = _CLNEW Term(_T("f3"), _T("value1")); - auto *query1 = _CLNEW TermQuery(t1); - Hits *hits1 = searcher.search(query1); + auto t1 = std::make_unique<Term>(_T("f3"), _T("value1")); + auto query1 =std::make_unique<TermQuery>(t1.get()); + Hits *hits1 = searcher.search(query1.get()); CLUCENE_ASSERT(1 == hits1->length()); + _CLDELETE(stream) + _CLDELETE(hits1) + _CLDELETE_ARRAY(field_name) } void TestMultiSetValueField(CuTest *tc) { RAMDirectory dir; - SimpleAnalyzer<TCHAR> analyzer; + SimpleAnalyzer<char> analyzer; IndexWriter w(&dir, &analyzer, true); + w.setUseCompoundFile(false); auto field_name = lucene::util::Misc::_charToWide("f3"); Document doc; auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + auto char_string_reader = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader->init("value1", 6, false); + auto stream = analyzer.tokenStream(field->name(), char_string_reader.get()); + field->setValue(stream); + char_string_reader->init("value2", 6, false); + auto stream2 = analyzer.tokenStream(field->name(), char_string_reader.get()); + field->setValue(stream2); - auto value1 = lucene::util::Misc::_charToWide("value1"); - field->setValue(value1, false); - auto value2 = lucene::util::Misc::_charToWide("value2"); - field->setValue(value2, false); doc.add(*field); w.addDocument(&doc); w.close(); IndexSearcher searcher(&dir); - Term *t1 = _CLNEW Term(_T("f3"), _T("value1")); - auto *query1 = _CLNEW TermQuery(t1); - Hits *hits1 = searcher.search(query1); + auto t1 = std::make_unique<Term>(_T("f3"), _T("value1")); + auto query1 = std::make_unique<TermQuery>(t1.get()); + Hits *hits1 = searcher.search(query1.get()); CLUCENE_ASSERT(0 == hits1->length()); - Term *t2 = _CLNEW Term(_T("f3"), _T("value2")); - auto *query2 = _CLNEW TermQuery(t2); - Hits *hits2 = searcher.search(query2); + auto t2 = std::make_unique<Term>(_T("f3"), _T("value2")); + auto query2 = std::make_unique<TermQuery>(t2.get()); + Hits *hits2 = searcher.search(query2.get()); CLUCENE_ASSERT(1 == hits2->length()); doc.clear(); - //_CLDELETE(field) + _CLDELETE(stream) + _CLDELETE(stream2) + _CLDELETE(hits1) + _CLDELETE(hits2) + _CLDELETE_ARRAY(field_name) } void TestMultiAddValueField(CuTest *tc) { RAMDirectory dir; auto field_name = lucene::util::Misc::_charToWide("f3"); - SimpleAnalyzer<TCHAR> analyzer; + SimpleAnalyzer<char> analyzer; IndexWriter w(&dir, &analyzer, true); + w.setUseCompoundFile(false); Document doc; - doc.add(*_CLNEW Field(field_name, _T("value1"), Field::INDEX_TOKENIZED | Field::STORE_NO)); - doc.add(*_CLNEW Field(field_name, _T("value2"), Field::INDEX_TOKENIZED | Field::STORE_NO)); - - w.addDocument(&doc); - w.close(); + auto field1 = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + auto char_string_reader = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader->init("value1", 6, false); + auto stream = analyzer.tokenStream(field1->name(), char_string_reader.get()); + field1->setValue(stream); + doc.add(*field1); + + auto field2 = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + auto char_string_reader2 = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader2->init("value2", 6, false); + auto stream2 = analyzer.tokenStream(field2->name(), char_string_reader2.get()); + field2->setValue(stream2); + doc.add(*field2); + + try { + w.addDocument(&doc); + w.close(); + } catch (CLuceneError& ae) { + std::cout <<ae.what() << std::endl; + throw ae; + } - Term *t1 = _CLNEW Term(_T("f3"), _T("value1")); - auto *query1 = _CLNEW TermQuery(t1); + auto t1 = std::make_unique<Term>(_T("f3"), _T("value1")); + auto query1 = std::make_unique<TermQuery>(t1.get()); IndexSearcher searcher(&dir); - Hits *hits1 = searcher.search(query1); + Hits *hits1 = searcher.search(query1.get()); CLUCENE_ASSERT(1 == hits1->length()); - Term *t2 = _CLNEW Term(_T("f3"), _T("value2")); - auto *query2 = _CLNEW TermQuery(t2); - Hits *hits2 = searcher.search(query2); + auto t2 = std::make_unique<Term>(_T("f3"), _T("value2")); + auto query2 = std::make_unique<TermQuery>(t2.get()); + Hits *hits2 = searcher.search(query2.get()); CLUCENE_ASSERT(1 == hits2->length()); doc.removeFields(_T("f3")); CLUCENE_ASSERT(doc.getFields()->size() == 0); - - _CLDELETE(query1); - _CLDELETE(query2); - _CLDELETE(t1); - _CLDELETE(t2); _CLDELETE(hits1); _CLDELETE(hits2); + _CLDELETE(stream); + _CLDELETE(stream2); + _CLDELETE_ARRAY(field_name) } void TestFields(CuTest *tc) { @@ -426,38 +454,42 @@ const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL; const int32_t MAX_BUFFER_DOCS = 100000000; const int32_t MERGE_FACTOR = 100000000; void TestAddDocument(CuTest *tc) { - RAMDirectory dir; - SimpleAnalyzer<char> sanalyzer; - IndexWriter w(&dir, NULL, true); - w.setUseCompoundFile(false); - w.setMaxBufferedDocs(MAX_BUFFER_DOCS); - w.setRAMBufferSizeMB(256); - w.setMaxFieldLength(MAX_FIELD_LEN); - w.setMergeFactor(MERGE_FACTOR); - w.setDocumentWriter(_CLNEW SDocumentsWriter<char>(w.getDirectory(), &w)); - Document doc; - auto field_name = lucene::util::Misc::_charToWide("f3"); - auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); - doc.add(*field); + try { + RAMDirectory dir; + SimpleAnalyzer<char> sanalyzer; + IndexWriter w(&dir, &sanalyzer, true); + w.setUseCompoundFile(false); + w.setMaxBufferedDocs(MAX_BUFFER_DOCS); + w.setRAMBufferSizeMB(256); + w.setMaxFieldLength(MAX_FIELD_LEN); + w.setMergeFactor(MERGE_FACTOR); + w.setDocumentWriter(_CLNEW SDocumentsWriter<char>(w.getDirectory(), &w)); + Document doc; + auto field_name = lucene::util::Misc::_charToWide("f3"); + auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + doc.add(*field); - for (int i = 0; i <= 2000000; i++) { - std::string value1 = "value1"; - if (i > 0) - value1 = generateRandomString(2000); - auto stringReader = _CLNEW lucene::util::SStringReader<char>( - value1.c_str(), strlen(value1.c_str()), false); - auto stream = sanalyzer.reusableTokenStream(field_name, stringReader); + for (int i = 0; i <= 2000000; i++) { + std::string value1 = "value1"; + if (i > 0) value1 = generateRandomString(2000); + auto stringReader = _CLNEW lucene::util::SStringReader<char>( + value1.c_str(), strlen(value1.c_str()), false); + auto stream = sanalyzer.reusableTokenStream(field_name, stringReader); - field->setValue(stream); - w.addDocument(&doc, &sanalyzer); + field->setValue(stream); + w.addDocument(&doc, &sanalyzer); + } + IndexSearcher searcher(&dir); + Term* t2 = _CLNEW Term(_T("f3"), _T("value1")); + auto* query2 = _CLNEW TermQuery(t2); + Hits* hits2 = searcher.search(query2); + CLUCENE_ASSERT(1 == hits2->length()); + doc.clear(); + w.close(); + } catch (CLuceneError& ae) { + std::cout <<ae.what() << std::endl; + throw ae; } - IndexSearcher searcher(&dir); - Term *t2 = _CLNEW Term(_T("f3"), _T("value1")); - auto *query2 = _CLNEW TermQuery(t2); - Hits *hits2 = searcher.search(query2); - CLUCENE_ASSERT(1 == hits2->length()); - doc.clear(); - w.close(); } void TestNewFieldBench(CuTest *tc) { @@ -495,18 +527,10 @@ void TestNewFieldBench(CuTest *tc) { CuSuite *testdocument(void) { CuSuite *suite = CuSuiteNew(_T("CLucene Document Test")); - //SUITE_ADD_TEST(suite, TestCompressedDocument); - //SUITE_ADD_TEST(suite, TestBinaryDocument); - //SUITE_ADD_TEST(suite, TestLazyCompressedDocument); - //SUITE_ADD_TEST(suite, TestLazyBinaryDocument); - // SUITE_ADD_TEST(suite, TestFieldSelectors); SUITE_ADD_TEST(suite, TestFields); SUITE_ADD_TEST(suite, TestMultiSetValueField); SUITE_ADD_TEST(suite, TestMultiAddValueField); - //SUITE_ADD_TEST(suite, TestSetFieldBench); - //SUITE_ADD_TEST(suite, TestNewFieldBench); SUITE_ADD_TEST(suite, TestReaderValueField); - SUITE_ADD_TEST(suite, TestAddDocument); - //SUITE_ADD_TEST(suite, TestDateTools); + //SUITE_ADD_TEST(suite, TestAddDocument); return suite; } diff --git a/src/test/search/TestSearchRange.cpp b/src/test/search/TestSearchRange.cpp index 82a4e170..a94fb386 100644 --- a/src/test/search/TestSearchRange.cpp +++ b/src/test/search/TestSearchRange.cpp @@ -1087,7 +1087,7 @@ static void testSearchEqual(CuTest* tc) { searcher._search(query, [&result2](DocRange* docRange) { if (docRange->type_ == DocRangeType::kMany) { result2.addMany(docRange->doc_many_size_, - docRange->doc_many.data()); + docRange->doc_many->data()); } else if (docRange->type_ == DocRangeType::kRange) { result2.addRange(docRange->doc_range.first, docRange->doc_range.second); diff --git a/src/test/test.h b/src/test/test.h index cbf08803..08c168cb 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -153,7 +153,7 @@ void TestAssertIndexReaderEquals(CuTest *tc, IndexReader* reader1, IndexReader* extern unittest tests[]; -#define CLUCENE_DATA_LOCATION1 "../../src/test/data/" +#define CLUCENE_DATA_LOCATION1 "./data/" #define CLUCENE_DATA_LOCATION2 "../src/test/data/" #define CLUCENE_DATA_LOCATION3 "../../../src/test/data/" #define CLUCENE_DATA_LOCATIONENV "srcdir" diff --git a/src/test/tests.cpp b/src/test/tests.cpp index 5d5421cb..372a4a28 100644 --- a/src/test/tests.cpp +++ b/src/test/tests.cpp @@ -7,48 +7,16 @@ #include "test.h" unittest tests[] = { -// {"threads", testatomicupdates}, -// {"indexreader", testindexreader}, -// {"indexsearcher", testIndexSearcher}, -// {"reuters", testreuters}, {"analysis", testanalysis}, {"analyzers", testanalyzers}, {"analysis", teststandard95}, {"document", testdocument}, {"field", testField}, -// {"numbertools", testNumberTools}, -// {"debug", testdebug}, -// {"ramdirectory", testRAMDirectory}, -// {"indexwriter", testindexwriter}, -// {"indexmodifier", testIndexModifier}, -// {"addIndexesNoOptimize", testAddIndexesNoOptimize}, -// {"highfreq", testhighfreq}, -// {"priorityqueue", testpriorityqueue}, -// {"datetools", testDateTools}, -// {"queryparser", testQueryParser}, -// {"mfqueryparser", testMultiFieldQueryParser}, -// {"boolean", testBoolean}, -// {"search", testsearch}, -// {"rangefilter", testRangeFilter}, -// {"queries", testqueries}, -// {"csrqueries", testConstantScoreQueries}, -// {"termvector", testtermvector}, -// {"sort", testsort}, -// {"duplicates", testduplicates}, -// {"datefilter", testdatefilter}, -// {"wildcard", testwildcard}, -// {"store", teststore}, -// {"utf8", testutf8}, -// {"bitset", testBitSet}, {"bkd", testBKD}, {"MSBRadixSorter",testMSBRadixSorter}, -// {"extractterms", testExtractTerms}, -// {"spanqueries", testSpanQueries}, -// {"stringbuffer", testStringBuffer}, -// {"termvectorsreader", testTermVectorsReader}, {"strconvert", testStrConvert}, {"searchRange", testSearchRange}, #ifdef TEST_CONTRIB_LIBS - {"chinese", testchinese}, + //{"chinese", testchinese}, #endif {"LastTest", NULL}}; diff --git a/src/test/util/TestBKD.cpp b/src/test/util/TestBKD.cpp index ec6c8f99..330f72f7 100644 --- a/src/test/util/TestBKD.cpp +++ b/src/test/util/TestBKD.cpp @@ -36,25 +36,30 @@ void TestVisitor1::visit(int docID) { } } -bool TestVisitor1::matches(uint8_t *packedValue) { +int TestVisitor1::matches(uint8_t* packedValue) { std::vector<uint8_t> result(4); std::copy(packedValue, packedValue + 4, result.begin()); int x = NumericUtils::sortableBytesToInt(result, 0); if (x >= queryMin && x <= queryMax) { - return true; + return 0; + } + if (x < queryMin) { + return -1; + } + if (x > queryMax) { + return 1; } - return false; } void TestVisitor1::visit(roaring::Roaring *docID, std::vector<uint8_t> &packedValue) { - if (!matches(packedValue.data())) { + if (matches(packedValue.data()) != 0) { return; } visit(*docID); } void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) { - if (!matches(packedValue.data())) { + if (matches(packedValue.data()) != 0) { return; } int32_t docID = iter->docid_set->nextDoc(); @@ -64,8 +69,7 @@ void TestVisitor1::visit(bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> } } -void TestVisitor1::visit( - int docID, std::vector<uint8_t> &packedValue) { +int TestVisitor1::visit(int docID, std::vector<uint8_t>& packedValue) { int x = NumericUtils::sortableBytesToInt(packedValue, 0); if (0) { wcout << L"visit docID=" << docID << L" x=" << x << endl; @@ -73,17 +77,29 @@ void TestVisitor1::visit( if (x >= queryMin && x <= queryMax) { //wcout << L"visit docID=" << docID << L" x=" << x << endl; hits->set(docID); + return 0; + } + if (x < queryMin) { + return -1; } + if (x > queryMax) { + return 1; + } + return 0; } -lucene::util::bkd::relation TestVisitor1::compare( - std::vector<uint8_t> &minPacked, std::vector<uint8_t> &maxPacked) { +lucene::util::bkd::relation TestVisitor1::compare_prefix(std::vector<uint8_t>& prefix) { + return lucene::util::bkd::relation::CELL_CROSSES_QUERY; +} + +lucene::util::bkd::relation TestVisitor1::compare(std::vector<uint8_t>& minPacked, + std::vector<uint8_t>& maxPacked) { int min = NumericUtils::sortableBytesToInt(minPacked, 0); int max = NumericUtils::sortableBytesToInt(maxPacked, 0); assert(max >= min); if (0) { - wcout << L"compare: min=" << min << L" max=" << max << L" vs queryMin=" - << queryMin << L" queryMax=" << queryMax << endl; + wcout << L"compare: min=" << min << L" max=" << max << L" vs queryMin=" << queryMin + << L" queryMax=" << queryMax << endl; } if (max < queryMin || min > queryMax) { @@ -95,104 +111,262 @@ lucene::util::bkd::relation TestVisitor1::compare( } } -TestVisitor::TestVisitor(const uint8_t *qMin, const uint8_t *qMax, - BitSet *h, predicate p) { +template <predicate QT> +TestVisitor<QT>::TestVisitor(const uint8_t* qMin, const uint8_t* qMax, BitSet* h) { queryMin = qMin; queryMax = qMax; hits = h; - pred = p; } -bool TestVisitor::matches(uint8_t *packedValue) { +template <predicate QT> +int TestVisitor<QT>::matches(uint8_t* packed_value) { + bool all_greater_than_max = true; + bool all_within_range = true; + for (int dim = 0; dim < reader->num_data_dims_; dim++) { int offset = dim * reader->bytes_per_dim_; - if (pred == L) { - if (lucene::util::FutureArrays::CompareUnsigned( - packedValue, offset, offset + reader->bytes_per_dim_, queryMax, offset, - offset + reader->bytes_per_dim_) >= 0) { - // Doc's value is too high, in this dimension - return false; - } - } else if (pred == G) { - if (lucene::util::FutureArrays::CompareUnsigned( - packedValue, offset, offset + reader->bytes_per_dim_, queryMin, offset, - offset + reader->bytes_per_dim_) <= 0) { - // Doc's value is too high, in this dimension - return false; + + auto result_max = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMax, offset, + offset + reader->bytes_per_dim_); + + auto result_min = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMin, offset, + offset + reader->bytes_per_dim_); + + all_greater_than_max &= (result_max > 0); + all_within_range &= (result_min > 0 && result_max < 0); + + if (!all_greater_than_max && !all_within_range) { + return -1; + } + } + + if (all_greater_than_max) { + return 1; + } else if (all_within_range) { + return 0; + } else { + return -1; + } +} + +template <> +int TestVisitor<predicate::EQ>::matches(uint8_t* packed_value) { + // if query type is equal, query_min == query_max + if (reader->num_data_dims_ == 1) { + return std::memcmp(packed_value, queryMin, reader->bytes_per_dim_); + } else { + // if all dim value > matched value, then return > 0, otherwise return < 0 + int return_result = 0; + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + auto result = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMin, offset, + offset + reader->bytes_per_dim_); + if (result < 0) { + return -1; + } else if (result > 0) { + return_result = 1; } - } else { - if (lucene::util::FutureArrays::CompareUnsigned( - packedValue, offset, offset + reader->bytes_per_dim_, queryMin, offset, - offset + reader->bytes_per_dim_) < 0) { - // Doc's value is too low, in this dimension - return false; + } + return return_result; + } +} + +template <> +int TestVisitor<predicate::L>::matches(uint8_t* packed_value) { + if (reader->num_data_dims_ == 1) { + auto result = std::memcmp(packed_value, queryMax, reader->bytes_per_dim_); + if (result >= 0) { + return 1; + } + return 0; + } else { + bool all_greater_or_equal = true; + bool all_lesser = true; + + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + auto result = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMax, offset, + offset + reader->bytes_per_dim_); + + all_greater_or_equal &= + (result >= 0); // Remains true only if all results are greater or equal + all_lesser &= (result < 0); // Remains true only if all results are lesser + } + + // Return 1 if all values are greater or equal, 0 if all are lesser, otherwise -1 + return all_greater_or_equal ? 1 : (all_lesser ? 0 : -1); + } +} + +template <> +int TestVisitor<predicate::LE>::matches(uint8_t* packed_value) { + if (reader->num_data_dims_ == 1) { + auto result = std::memcmp(packed_value, queryMax, reader->bytes_per_dim_); + if (result > 0) { + return 1; + } + return 0; + } else { + bool all_greater = true; + bool all_lesser_or_equal = true; + + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + auto result = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMax, offset, + offset + reader->bytes_per_dim_); + + all_greater &= (result > 0); // Remains true only if all results are greater + all_lesser_or_equal &= + (result <= 0); // Remains true only if all results are lesser or equal + } + + // Return 1 if all values are greater or equal, 0 if all are lesser, otherwise -1 + return all_greater ? 1 : (all_lesser_or_equal ? 0 : -1); + } +} + +template <> +int TestVisitor<predicate::G>::matches(uint8_t* packed_value) { + if (reader->num_data_dims_ == 1) { + auto result = std::memcmp(packed_value, queryMin, reader->bytes_per_dim_); + if (result <= 0) { + return -1; + } + return 0; + } else { + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + auto result = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMin, offset, + offset + reader->bytes_per_dim_); + if (result <= 0) { + return -1; } - if (lucene::util::FutureArrays::CompareUnsigned( - packedValue, offset, offset + reader->bytes_per_dim_, queryMax, offset, - offset + reader->bytes_per_dim_) > 0) { - // Doc's value is too high, in this dimension - return false; + } + return 0; + } +} + +template <> +int TestVisitor<predicate::GE>::matches(uint8_t* packed_value) { + if (reader->num_data_dims_ == 1) { + auto result = std::memcmp(packed_value, queryMin, reader->bytes_per_dim_); + if (result < 0) { + return -1; + } + return 0; + } else { + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + auto result = lucene::util::FutureArrays::CompareUnsigned( + packed_value, offset, offset + reader->bytes_per_dim_, queryMin, offset, + offset + reader->bytes_per_dim_); + if (result < 0) { + return -1; } } + return 0; } - return true; } -void TestVisitor::visit(int rowID) { +template <predicate QT> +void TestVisitor<QT>::visit(int rowID) { hits->set(rowID); if (0) { std::wcout << L"visit docID=" << rowID << std::endl; } } -void TestVisitor::visit(int rowID, std::vector<uint8_t> &packedValue) { +template <predicate QT> +int TestVisitor<QT>::visit(int rowID, std::vector<uint8_t>& packedValue) { if (0) { int x = lucene::util::NumericUtils::sortableBytesToLong(packedValue, 0); std::wcout << L"visit docID=" << rowID << L" x=" << x << std::endl; } - if (matches(packedValue.data())) { - hits->set(rowID); + auto result = matches(packedValue.data()); + if (result != 0) { + return result; } + hits->set(rowID); + return 0; } -lucene::util::bkd::relation TestVisitor::compare(std::vector<uint8_t> &minPacked, - std::vector<uint8_t> &maxPacked) { +template <> +lucene::util::bkd::relation TestVisitor<predicate::L>::compare(std::vector<uint8_t>& min_packed, + std::vector<uint8_t>& max_packed) { bool crosses = false; - for (int dim = 0; dim < reader->num_data_dims_; dim++) { int offset = dim * reader->bytes_per_dim_; - - if (pred == L) { - if (lucene::util::FutureArrays::CompareUnsigned( - minPacked.data(), offset, offset + reader->bytes_per_dim_, queryMax, offset, - offset + reader->bytes_per_dim_) >= 0) { - return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; - } - } else if (pred == G) { - if (lucene::util::FutureArrays::CompareUnsigned( - maxPacked.data(), offset, offset + reader->bytes_per_dim_, queryMin, offset, - offset + reader->bytes_per_dim_) <= 0) { - return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; - } - } else { - if (lucene::util::FutureArrays::CompareUnsigned( - minPacked.data(), offset, offset + reader->bytes_per_dim_, queryMax, offset, - offset + reader->bytes_per_dim_) > 0 || - lucene::util::FutureArrays::CompareUnsigned( - maxPacked.data(), offset, offset + reader->bytes_per_dim_, queryMin, offset, - offset + reader->bytes_per_dim_) < 0) { - return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; - } + if (lucene::util::FutureArrays::CompareUnsigned( + min_packed.data(), offset, offset + reader->bytes_per_dim_, queryMax, offset, + offset + reader->bytes_per_dim_) >= 0) { + return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; } + crosses |= lucene::util::FutureArrays::CompareUnsigned( + min_packed.data(), offset, offset + reader->bytes_per_dim_, queryMin, + offset, offset + reader->bytes_per_dim_) <= 0 || + lucene::util::FutureArrays::CompareUnsigned( + max_packed.data(), offset, offset + reader->bytes_per_dim_, queryMax, + offset, offset + reader->bytes_per_dim_) >= 0; + } + if (crosses) { + return lucene::util::bkd::relation::CELL_CROSSES_QUERY; + } else { + return lucene::util::bkd::relation::CELL_INSIDE_QUERY; + } +} +template <> +lucene::util::bkd::relation TestVisitor<predicate::G>::compare(std::vector<uint8_t>& min_packed, + std::vector<uint8_t>& max_packed) { + bool crosses = false; + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + if (lucene::util::FutureArrays::CompareUnsigned( + max_packed.data(), offset, offset + reader->bytes_per_dim_, queryMin, offset, + offset + reader->bytes_per_dim_) <= 0) { + return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; + } crosses |= lucene::util::FutureArrays::CompareUnsigned( - minPacked.data(), offset, offset + reader->bytes_per_dim_, queryMin, + min_packed.data(), offset, offset + reader->bytes_per_dim_, queryMin, offset, offset + reader->bytes_per_dim_) <= 0 || lucene::util::FutureArrays::CompareUnsigned( - maxPacked.data(), offset, offset + reader->bytes_per_dim_, queryMax, + max_packed.data(), offset, offset + reader->bytes_per_dim_, queryMax, offset, offset + reader->bytes_per_dim_) >= 0; } + if (crosses) { + return lucene::util::bkd::relation::CELL_CROSSES_QUERY; + } else { + return lucene::util::bkd::relation::CELL_INSIDE_QUERY; + } +} +template <predicate QT> +lucene::util::bkd::relation TestVisitor<QT>::compare(std::vector<uint8_t>& min_packed, + std::vector<uint8_t>& max_packed) { + bool crosses = false; + for (int dim = 0; dim < reader->num_data_dims_; dim++) { + int offset = dim * reader->bytes_per_dim_; + if (lucene::util::FutureArrays::CompareUnsigned( + min_packed.data(), offset, offset + reader->bytes_per_dim_, queryMax, offset, + offset + reader->bytes_per_dim_) > 0 || + lucene::util::FutureArrays::CompareUnsigned( + max_packed.data(), offset, offset + reader->bytes_per_dim_, queryMin, offset, + offset + reader->bytes_per_dim_) < 0) { + return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; + } + crosses |= lucene::util::FutureArrays::CompareUnsigned( + min_packed.data(), offset, offset + reader->bytes_per_dim_, queryMin, + offset, offset + reader->bytes_per_dim_) < 0 || + lucene::util::FutureArrays::CompareUnsigned( + max_packed.data(), offset, offset + reader->bytes_per_dim_, queryMax, + offset, offset + reader->bytes_per_dim_) > 0; + } if (crosses) { return lucene::util::bkd::relation::CELL_CROSSES_QUERY; } else { @@ -200,6 +374,23 @@ lucene::util::bkd::relation TestVisitor::compare(std::vector<uint8_t> &minPacked } } +template <predicate QT> +lucene::util::bkd::relation TestVisitor<QT>::compare_prefix(std::vector<uint8_t>& prefix) { + if (lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0, prefix.size(), queryMax, 0, + prefix.size()) > 0 || + lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0, prefix.size(), queryMin, 0, + prefix.size()) < 0) { + return lucene::util::bkd::relation::CELL_OUTSIDE_QUERY; + } + if (lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0, prefix.size(), queryMin, 0, + prefix.size()) > 0 && + lucene::util::FutureArrays::CompareUnsigned(prefix.data(), 0, prefix.size(), queryMax, 0, + prefix.size()) < 0) { + return lucene::util::bkd::relation::CELL_INSIDE_QUERY; + } + return lucene::util::bkd::relation::CELL_CROSSES_QUERY; +} + Directory *getDirectory(int numPoints) { Directory *dir; if (numPoints > 100000) { @@ -279,7 +470,7 @@ void testSameInts1DRead(CuTest *tc) { r->intersect(v.get()); } catch (CLuceneError &r) { //printf("something wrong in read\n"); - printf("clucene error: %s\n", r.what()); + printf("clucene error in testSameInts1DRead: %s\n", r.what()); } for (int docID = 0; docID < N; docID++) { bool expected = docID >= queryMin && docID <= queryMax; @@ -311,7 +502,7 @@ void testSameInts1DRead(CuTest *tc) { void testBug1Write(CuTest *tc) { const int N = 8; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir(FSDirectory::getDirectory("testBug1")); shared_ptr<bkd::bkd_writer> w = make_shared<bkd::bkd_writer>(N, 1, 1, 4, 4, 100.0f, N, true); w->docs_seen_ = N; @@ -331,9 +522,9 @@ void testBug1Write(CuTest *tc) { int64_t indexFP; { - std::unique_ptr<IndexOutput> out(dir->createOutput("bkd3")); - std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd3_meta")); - std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd3_index")); + std::unique_ptr<IndexOutput> out(dir->createOutput("bkd")); + std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd_meta")); + std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd_index")); try { indexFP = w->finish(out.get(), index_out.get()); w->meta_finish(meta_out.get(), indexFP, 0); @@ -348,13 +539,15 @@ void testBug1Write(CuTest *tc) { void testBug1Read(CuTest *tc) { uint64_t str = Misc::currentTimeMillis(); - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + auto *dir = FSDirectory::getDirectory("testBug1"); { - IndexInput *in_(dir->openInput("bkd3")); - IndexInput *meta_in_(dir->openInput("bkd3_meta")); - IndexInput *index_in_(dir->openInput("bkd3_index")); - - shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_); + auto closeDirectory = true; + auto bkd_reader = + std::make_shared<lucene::util::bkd::bkd_reader>(dir, closeDirectory); + if (!bkd_reader->open()) { + printf("can not open bkd file\n"); + exit(1); + } // Simple 1D range query: int value = 0; auto result = std::make_unique<BitSet>(10); @@ -364,27 +557,24 @@ void testBug1Read(CuTest *tc) { const auto *max = reinterpret_cast<const uint8_t *>(value_bytes.data()); const auto *min = reinterpret_cast<const uint8_t *>(value_bytes.data()); - auto v = std::make_unique<TestVisitor>(min, max, result.get(), EQ); + auto v = std::make_unique<TestVisitor<EQ>>(min, max, result.get()); try { - v->setReader(r); - r->read_meta(meta_in_); - //auto type = r->read_type(); - CuAssertEquals(tc, 0, r->type); - r->read_index(index_in_); - r->intersect(v.get()); + v->setReader(bkd_reader); + bkd_reader->intersect(v.get()); } catch (CLuceneError &r) { //printf("something wrong in read\n"); - printf("clucene error: %s\n", r.what()); + printf("clucene error in testBug1Read: %s\n", r.what()); } //printf("hits count=%d\n", result->count()); CuAssertEquals(tc, result->count(), 6); //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } + _CLLDECDELETE(dir) } void testLowCardinalInts1DWrite(CuTest *tc) { const int N = 1024 * 1024; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir(FSDirectory::getDirectory("testLowCardinalInts1D")); shared_ptr<bkd::bkd_writer> w = make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true); w->docs_seen_ = N; @@ -402,9 +592,9 @@ void testLowCardinalInts1DWrite(CuTest *tc) { // equivalent: ORIGINAL LINE: try (org.apache.lucene.store.IndexOutput out = // dir.createOutput("bkd", org.apache.lucene.store.IOContext.DEFAULT)) { - std::unique_ptr<IndexOutput> out(dir->createOutput("bkd2")); - std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd2_meta")); - std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd2_index")); + std::unique_ptr<IndexOutput> out(dir->createOutput("bkd")); + std::unique_ptr<IndexOutput> meta_out(dir->createOutput("bkd_meta")); + std::unique_ptr<IndexOutput> index_out(dir->createOutput("bkd_index")); //auto metaOffset = w->MetaInit(out.get()); try { @@ -422,68 +612,68 @@ void testLowCardinalInts1DWrite(CuTest *tc) { void testLowCardinalInts1DRead2(CuTest *tc) { uint64_t str = Misc::currentTimeMillis(); const int N = 1024 * 1024; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir = FSDirectory::getDirectory("testLowCardinalInts1D"); { - IndexInput *in_(dir->openInput("bkd2")); - IndexInput *meta_in_(dir->openInput("bkd2_meta")); - IndexInput *index_in_(dir->openInput("bkd2_index")); - shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_); + auto closeDirectory = true; + auto bkd_reader = + std::make_shared<lucene::util::bkd::bkd_reader>(dir, closeDirectory); + if (!bkd_reader->open()) { + printf("can not open bkd file\n"); + exit(1); + } // Simple 1D range query: constexpr int queryMin = 0; //std::numeric_limits<int>::min(); constexpr int queryMax = 100;//std::numeric_limits<int>::max(); auto hits = std::make_shared<BitSet>(N); auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits); try { - r->read_meta(meta_in_); - //auto type = r->read_type(); - CuAssertEquals(tc, 0, r->type); - r->read_index(index_in_); - r->intersect(v.get()); + bkd_reader->intersect(v.get()); } catch (CLuceneError &r) { //printf("something wrong in read\n"); - printf("clucene error: %s\n", r.what()); + printf("clucene error in testLowCardinalInts1DRead2: %s\n", r.what()); } //printf("hits count=%d\n", hits->count()); CuAssertEquals(tc, hits->count(), 12928); //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + _CLLDECDELETE(dir) } } void testLowCardinalInts1DRead(CuTest *tc) { uint64_t str = Misc::currentTimeMillis(); const int N = 1024 * 1024; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir = FSDirectory::getDirectory("testLowCardinalInts1D"); { - IndexInput *in_(dir->openInput("bkd2")); - IndexInput *meta_in_(dir->openInput("bkd2_meta")); - IndexInput *index_in_(dir->openInput("bkd2_index")); + auto closeDirectory = true; + auto bkd_reader = + std::make_shared<lucene::util::bkd::bkd_reader>(dir, closeDirectory); + if (!bkd_reader->open()) { + printf("can not open bkd file\n"); + exit(1); + } - shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_); // Simple 1D range query: constexpr int queryMin = 0;//std::numeric_limits<int>::min(); constexpr int queryMax = 1;//std::numeric_limits<int>::max(); auto hits = std::make_shared<BitSet>(N); auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits); try { - r->read_meta(meta_in_); - //auto type = r->read_type(); - CuAssertEquals(tc, 0, r->type); - r->read_index(index_in_); - r->intersect(v.get()); + bkd_reader->intersect(v.get()); } catch (CLuceneError &r) { //printf("something wrong in read\n"); - printf("clucene error: %s\n", r.what()); + printf("clucene error in testLowCardinalInts1DRead: %s\n", r.what()); } //printf("hits count=%d\n", hits->count()); CuAssertEquals(tc, hits->count(), 256); //printf("\nFirst search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); + _CLLDECDELETE(dir) } } void testBasicsInts1DWrite(CuTest *tc) { const int N = 1024 * 1024; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir(FSDirectory::getDirectory("testBasicsInts1D")); shared_ptr<bkd::bkd_writer> w = make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true); w->docs_seen_ = N; @@ -520,26 +710,26 @@ void testBasicsInts1DWrite(CuTest *tc) { void testBasicsInts1DRead(CuTest *tc) { uint64_t str = Misc::currentTimeMillis(); const int N = 1024 * 1024; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir = FSDirectory::getDirectory("testBasicsInts1D"); { - IndexInput *in_(dir->openInput("bkd")); - IndexInput *meta_in_(dir->openInput("bkd_meta")); - IndexInput *index_in_(dir->openInput("bkd_index")); - shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_); + auto closeDirectory = true; + auto bkd_reader = + std::make_shared<lucene::util::bkd::bkd_reader>(dir, closeDirectory); + if (!bkd_reader->open()) { + printf("can not open bkd file\n"); + exit(1); + } + // Simple 1D range query: constexpr int queryMin = 1024; constexpr int queryMax = std::numeric_limits<int>::max(); auto hits = std::make_shared<BitSet>(N); auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits); try { - r->read_meta(meta_in_); - //auto type = r->read_type(); - CuAssertEquals(tc, 0, r->type); - r->read_index(index_in_); - r->intersect(v.get()); + bkd_reader->intersect(v.get()); } catch (CLuceneError &r) { //printf("something wrong in read\n"); - printf("clucene error: %s\n", r.what()); + printf("clucene error in testBasicsInts1DRead: %s\n", r.what()); } for (int docID = 0; docID < N; docID++) { bool expected = docID >= queryMin && docID <= queryMax; @@ -556,7 +746,7 @@ void testBasicsInts1DRead(CuTest *tc) { auto v1 = std::make_unique<TestVisitor1>(queryMin, queryMax, hits1); str = Misc::currentTimeMillis(); - r->intersect(v1.get()); + bkd_reader->intersect(v1.get()); for (int docID = 0; docID < N; docID++) { bool expected = docID >= queryMin && docID <= queryMax; bool actual = hits1->get(N - docID - 1); @@ -568,7 +758,7 @@ void testBasicsInts1DRead(CuTest *tc) { } //printf("\nSecond search time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } - dir->close(); + //dir->close(); _CLDECDELETE(dir); } @@ -595,7 +785,7 @@ void testHttplogsRead(CuTest *tc) { const auto *max = reinterpret_cast<const uint8_t *>(scratch2.data()); const auto *min = reinterpret_cast<const uint8_t *>(scratch.data()); - auto v = std::make_unique<TestVisitor>(min, max, result.get(), G); + auto v = std::make_unique<TestVisitor<G>>(min, max, result.get()); v->setReader(r); try { str = Misc::currentTimeMillis(); @@ -608,7 +798,7 @@ void testHttplogsRead(CuTest *tc) { //printf("\nsearch time taken: %d ms\n\n", (int32_t) (Misc::currentTimeMillis() - str)); } catch (CLuceneError &r) { //printf("something wrong in read\n"); - printf("clucene error: %s\n", r.what()); + printf("clucene error in testHttplogsRead: %s\n", r.what()); } //printf("result size = %d\n", result->count()); CuAssertEquals(tc, result->count(), 8445); @@ -696,7 +886,7 @@ void testSame(CuTest *tc) { { //std::shared_ptr<Directory> dir{getDirectory(10001)}; const int N = 1024 * 1024; - Directory *dir(FSDirectory::getDirectory("TestBKDTree")); + Directory *dir(FSDirectory::getDirectory("testSame")); shared_ptr<bkd::bkd_writer> w = make_shared<bkd::bkd_writer>(N, 1, 1, 4, 512, 100.0f, N, true); @@ -724,11 +914,13 @@ void testSame(CuTest *tc) { // equivalent: ORIGINAL LINE: try (org.apache.lucene.store.IndexInput in = // dir.openInput("bkd", org.apache.lucene.store.IOContext.DEFAULT)) { - IndexInput *in_(dir->openInput("bkd")); - IndexInput *meta_in_(dir->openInput("bkd_meta")); - IndexInput *index_in_(dir->openInput("bkd_index")); - //in_->seek(indexFP); - shared_ptr<bkd::bkd_reader> r = make_shared<bkd::bkd_reader>(in_); + auto closeDirectory = true; + auto bkd_reader = + std::make_shared<lucene::util::bkd::bkd_reader>(dir, closeDirectory); + if (!bkd_reader->open()) { + printf("can not open bkd file\n"); + exit(1); + } // Simple 1D range query: constexpr int queryMin = 100; @@ -737,11 +929,7 @@ void testSame(CuTest *tc) { //std::shared_ptr<BitSet> hits; auto hits = std::make_shared<BitSet>(N); auto v = std::make_unique<TestVisitor1>(queryMin, queryMax, hits); - r->read_meta(meta_in_); - //auto type = r->read_type(); - CuAssertEquals(tc, 0, r->type); - r->read_index(index_in_); - r->intersect(v.get()); + bkd_reader->intersect(v.get()); for (int docID = 0; docID < N; docID++) { bool expected = (100 >= queryMin && 100 <= queryMax); @@ -753,7 +941,7 @@ void testSame(CuTest *tc) { //assertEquals(L"docID=" + to_wstring(docID), expected, actual); } } - dir->close(); + //dir->close(); _CLDECDELETE(dir); } } @@ -765,7 +953,7 @@ void equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) { const auto *max = reinterpret_cast<const uint8_t *>(&value); const auto *min = reinterpret_cast<const uint8_t *>(&value); - auto v = std::make_unique<TestVisitor>(min, max, result.get(), EQ); + auto v = std::make_unique<TestVisitor<EQ>>(min, max, result.get()); v->setReader(r); r->intersect(v.get()); printf("count: %d\n", result->count()); @@ -788,7 +976,7 @@ void less_equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) { } const auto *max = reinterpret_cast<const uint8_t *>(&value); - auto v = std::make_unique<TestVisitor>(min.data(), max, result.get(), LE); + auto v = std::make_unique<TestVisitor<LE>>(min.data(), max, result.get()); v->setReader(r); r->intersect(v.get()); printf("\ncount: %d\n", result->count()); @@ -814,7 +1002,7 @@ void less_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) { } const auto *max = reinterpret_cast<const uint8_t *>(&value); - auto v = std::make_unique<TestVisitor>(min.data(), max, result.get(), L); + auto v = std::make_unique<TestVisitor<L>>(min.data(), max, result.get()); v->setReader(r); r->intersect(v.get()); printf("count: %d\n", result->count()); @@ -836,7 +1024,7 @@ void greater_equal_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) { } const auto *min = reinterpret_cast<const uint8_t *>(&value); - auto v = std::make_unique<TestVisitor>(min, max.data(), result.get(), GE); + auto v = std::make_unique<TestVisitor<GE>>(min, max.data(), result.get()); v->setReader(r); r->intersect(v.get()); printf("count: %d\n", result->count()); @@ -858,7 +1046,7 @@ void greater_predicate(std::shared_ptr<lucene::util::bkd::bkd_reader> r) { } const auto *min = reinterpret_cast<const uint8_t *>(&value); - auto v = std::make_unique<TestVisitor>(min, max.data(), result.get(), G); + auto v = std::make_unique<TestVisitor<G>>(min, max.data(), result.get()); v->setReader(r); r->intersect(v.get()); printf("count: %d\n", result->count()); diff --git a/src/test/util/TestBKD.h b/src/test/util/TestBKD.h index a66f252a..327c70fe 100644 --- a/src/test/util/TestBKD.h +++ b/src/test/util/TestBKD.h @@ -26,19 +26,20 @@ public: } } void visit(std::vector<char>& docID, std::vector<uint8_t> &packedValue) override { - if (!matches(packedValue.data())) { + if (matches(packedValue.data()) != 0) { return; } visit(roaring::Roaring::read(docID.data(), false)); } void visit(roaring::Roaring *docID, std::vector<uint8_t> &packedValue) override; - void visit(int docID, std::vector<uint8_t> &packedValue) override; void visit(lucene::util::bkd::bkd_docid_set_iterator *iter, std::vector<uint8_t> &packedValue) override; + int visit(int docid, std::vector<uint8_t> &packedValue) override; - bool matches(uint8_t *packedValue); + int matches(uint8_t *packedValue); lucene::util::bkd::relation compare(std::vector<uint8_t> &minPacked, std::vector<uint8_t> &maxPacked) override; + lucene::util::bkd::relation compare_prefix(std::vector<uint8_t> &prefix) override; }; enum predicate { @@ -49,20 +50,17 @@ enum predicate { EQ }; +template <predicate QT> class TestVisitor : public lucene::util::bkd::bkd_reader::intersect_visitor { private: const uint8_t *queryMin; const uint8_t *queryMax; - //int queryMin = 0; - //int queryMax = 0; lucene::util::BitSet *hits; - //std::shared_ptr<lucene::util::BitSet> hits; std::shared_ptr<lucene::util::bkd::bkd_reader> reader; - predicate pred; public: - TestVisitor(const uint8_t *queryMin, const uint8_t *queryMax, lucene::util::BitSet *hits, predicate p); - virtual ~TestVisitor() = default; + TestVisitor(const uint8_t *queryMin, const uint8_t *queryMax, lucene::util::BitSet *hits); + ~TestVisitor() override = default; void setReader(std::shared_ptr<lucene::util::bkd::bkd_reader> &r) { reader = r; }; @@ -101,9 +99,10 @@ public: docID = iter->docid_set->nextDoc(); } }; - bool matches(uint8_t *packedValue); + int matches(uint8_t *packedValue); + lucene::util::bkd::relation compare_prefix(std::vector<uint8_t> &prefix) override; - void visit(int rowID, std::vector<uint8_t> &packedValue) override; + int visit(int rowID, std::vector<uint8_t> &packedValue) override; lucene::util::bkd::relation compare(std::vector<uint8_t> &minPacked, std::vector<uint8_t> &maxPacked) override; diff --git a/src/test/util/TestMSBRadixSorter.cpp b/src/test/util/TestMSBRadixSorter.cpp index d817e118..347ee43e 100644 --- a/src/test/util/TestMSBRadixSorter.cpp +++ b/src/test/util/TestMSBRadixSorter.cpp @@ -89,23 +89,21 @@ void TestMSBRadixSorter::testOneValue() void TestMSBRadixSorter::testNValues() { const int n = 1000; - std::vector<uint8_t> scratch(4); auto y = std::vector<BytesRef>(); auto z = std::vector<BytesRef>(); for (int docID = 0; docID < n; docID++) { + std::vector<uint8_t> scratch(4); NumericUtils::intToSortableBytes(docID, scratch, 0); - BytesRef x1(scratch); - y.emplace_back(x1); + y.emplace_back(scratch); } - //for (int docID = 0; docID <n; docID++) { for (int docID = n-1; docID >= 0; docID--) { + std::vector<uint8_t> scratch(4); NumericUtils::intToSortableBytes(docID, scratch, 0); - BytesRef x1(scratch); - z.emplace_back(x1); + z.emplace_back(scratch); } - test(y,z, n); + test(y, z, n); } void testSorter(CuTest *tc) { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org