This is an automated email from the ASF dual-hosted git repository. jianliangqi pushed a commit to branch clucene in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git
The following commit(s) were added to refs/heads/clucene by this push: new 1c76e25b [fix](MultiPhrase) fix MultiPhraseQuery memory leak (#175) 1c76e25b is described below commit 1c76e25b55a27823917624c1cb0406c34f77e72d Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Wed Jan 10 10:10:10 2024 +0800 [fix](MultiPhrase) fix MultiPhraseQuery memory leak (#175) --- src/core/CLucene/search/MultiPhraseQuery.cpp | 4 +- src/test/CMakeLists.txt | 1 + src/test/query/TestMultiPhraseQuery.cpp | 163 +++++++++++++++++++++++++++ src/test/test.h | 1 + src/test/tests.cpp | 1 + 5 files changed, 168 insertions(+), 2 deletions(-) diff --git a/src/core/CLucene/search/MultiPhraseQuery.cpp b/src/core/CLucene/search/MultiPhraseQuery.cpp index 54273702..107c8b11 100644 --- a/src/core/CLucene/search/MultiPhraseQuery.cpp +++ b/src/core/CLucene/search/MultiPhraseQuery.cpp @@ -211,8 +211,8 @@ Query* MultiPhraseQuery::rewrite(IndexReader* /*reader*/) { ArrayBase<Term*>* terms = termArrays->at(0); BooleanQuery* boq = _CLNEW BooleanQuery(true); for ( size_t i=0;i<terms->length;i++ ){ - boq->add(_CLNEW TermQuery((*terms)[i]), BooleanClause::SHOULD); - } + boq->add(_CLNEW TermQuery((*terms)[i]), true, BooleanClause::SHOULD); + } boq->setBoost(getBoost()); return boq; } else { diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 20c722f8..69737268 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -102,6 +102,7 @@ SET(test_files ./tests.cpp ./util/TestStringBuffer.cpp ./util/English.cpp ./util/TestStrConvert.cpp + ./query/TestMultiPhraseQuery.cpp ${test_HEADERS}) IF (USE_SHARED_OBJECT_FILES) GET_SHARED_FILES(clucene_shared_Files) diff --git a/src/test/query/TestMultiPhraseQuery.cpp b/src/test/query/TestMultiPhraseQuery.cpp new file mode 100644 index 00000000..ccc4fe7f --- /dev/null +++ b/src/test/query/TestMultiPhraseQuery.cpp @@ -0,0 +1,163 @@ +#include <CLucene.h> + +#include <iostream> +#include <memory> +#include <vector> + +#include "CLucene/debug/error.h" +#include "CLucene/index/IndexReader.h" +#include "CLucene/index/Term.h" +#include "CLucene/search/MultiPhraseQuery.h" +#include "CLucene/store/Directory.h" +#include "CLucene/store/FSDirectory.h" +#include "CLucene/store/RAMDirectory.h" +#include "test.h" + +CL_NS_USE(util) +CL_NS_USE(store) +CL_NS_USE(search) +CL_NS_USE(index) + +void testSimple1Add(CuTest* tc) { + RAMDirectory dir; + + SimpleAnalyzer<char> analyzer; + IndexWriter w(&dir, &analyzer, true); + w.setUseCompoundFile(false); + auto field_name = lucene::util::Misc::_charToWide("name"); + std::string value = "value"; + + Document doc; + auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + + auto char_string_reader = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader->init(value.data(), value.size(), true); + auto stream = analyzer.tokenStream(field->name(), char_string_reader.get()); + field->setValue(stream); + doc.add(*field); + + w.addDocument(&doc); + w.close(); + + IndexSearcher index_searcher(&dir); + { + MultiPhraseQuery query; + + Term* t1 = _CLNEW Term(_T( "name" ), _T( "t1" )); + query.add(t1); + _CLLDECDELETE(t1); + + std::vector<int32_t> result; + index_searcher._search(&query, [&result](const int32_t docid, const float_t /*score*/) { + result.push_back(docid); + }); + CLUCENE_ASSERT(result.size() == 0); + } + + _CLDELETE(stream) + _CLDELETE_ARRAY(field_name) +} + +void testSimple2Add(CuTest* tc) { + RAMDirectory dir; + + SimpleAnalyzer<char> analyzer; + IndexWriter w(&dir, &analyzer, true); + w.setUseCompoundFile(false); + auto field_name = lucene::util::Misc::_charToWide("name"); + std::string value = "value"; + + Document doc; + auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + + auto char_string_reader = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader->init(value.data(), value.size(), true); + auto stream = analyzer.tokenStream(field->name(), char_string_reader.get()); + field->setValue(stream); + doc.add(*field); + + w.addDocument(&doc); + w.close(); + + IndexSearcher index_searcher(&dir); + { + MultiPhraseQuery query; + + std::vector<Term*> terms; + terms.push_back(_CLNEW Term(_T( "name" ), _T( "t2" ))); + terms.push_back(_CLNEW Term(_T( "name" ), _T( "t3" ))); + terms.push_back(_CLNEW Term(_T( "name" ), _T( "t4" ))); + query.add(terms); + for (int32_t i = 0; i < terms.size(); i++) { + _CLLDECDELETE(terms[i]); + } + + std::vector<int32_t> result; + index_searcher._search(&query, [&result](const int32_t docid, const float_t /*score*/) { + result.push_back(docid); + }); + CLUCENE_ASSERT(result.size() == 0); + } + + _CLDELETE(stream) + _CLDELETE_ARRAY(field_name) +} + +void testMultiAdd(CuTest* tc) { + RAMDirectory dir; + + SimpleAnalyzer<char> analyzer; + IndexWriter w(&dir, &analyzer, true); + w.setUseCompoundFile(false); + auto field_name = lucene::util::Misc::_charToWide("name"); + std::string value = "value"; + + Document doc; + auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | Field::STORE_NO); + + auto char_string_reader = std::make_unique<lucene::util::SStringReader<char>>(); + char_string_reader->init(value.data(), value.size(), true); + auto stream = analyzer.tokenStream(field->name(), char_string_reader.get()); + field->setValue(stream); + doc.add(*field); + + w.addDocument(&doc); + w.close(); + + IndexSearcher index_searcher(&dir); + { + MultiPhraseQuery query; + + Term* t1 = _CLNEW Term(_T( "name" ), _T( "t1" )); + query.add(t1); + _CLLDECDELETE(t1); + + std::vector<Term*> terms; + terms.push_back(_CLNEW Term(_T( "name" ), _T( "t2" ))); + terms.push_back(_CLNEW Term(_T( "name" ), _T( "t3" ))); + terms.push_back(_CLNEW Term(_T( "name" ), _T( "t4" ))); + query.add(terms); + for (int32_t i = 0; i < terms.size(); i++) { + _CLLDECDELETE(terms[i]); + } + + std::vector<int32_t> result; + index_searcher._search(&query, [&result](const int32_t docid, const float_t /*score*/) { + result.push_back(docid); + }); + CLUCENE_ASSERT(result.size() == 0); + } + + _CLDELETE(stream) + _CLDELETE_ARRAY(field_name) +} + +CuSuite* testMultiPhraseQuery(void) { + CuSuite* suite = CuSuiteNew(_T("CLucene MultiPhraseQuery Test")); + + SUITE_ADD_TEST(suite, testSimple1Add); + SUITE_ADD_TEST(suite, testSimple2Add); + SUITE_ADD_TEST(suite, testMultiAdd); + + return suite; +} \ No newline at end of file diff --git a/src/test/test.h b/src/test/test.h index 08c168cb..a414af8d 100644 --- a/src/test/test.h +++ b/src/test/test.h @@ -82,6 +82,7 @@ CuSuite *testTermVectorsReader(void); CuSuite *teststandard95(void); CuSuite *testStrConvert(void); CuSuite *testSearchRange(void); +CuSuite *testMultiPhraseQuery(void); #ifdef TEST_CONTRIB_LIBS //CuSuite *testGermanAnalyzer(void); diff --git a/src/test/tests.cpp b/src/test/tests.cpp index d703e159..68a872b4 100644 --- a/src/test/tests.cpp +++ b/src/test/tests.cpp @@ -16,6 +16,7 @@ unittest tests[] = { {"MSBRadixSorter",testMSBRadixSorter}, {"strconvert", testStrConvert}, {"searchRange", testSearchRange}, + {"MultiPhraseQuery", testMultiPhraseQuery}, #ifdef TEST_CONTRIB_LIBS {"chinese", testchinese}, #endif --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org