(doris-thirdparty) branch clucene updated: [fix](MultiPhrase) fix MultiPhraseQuery memory leak (#175)

jianliangqi Tue, 09 Jan 2024 18:10:19 -0800

This is an automated email from the ASF dual-hosted git repository.

jianliangqi pushed a commit to branch clucene
in repository https://gitbox.apache.org/repos/asf/doris-thirdparty.git



The following commit(s) were added to refs/heads/clucene by this push:
     new 1c76e25b [fix](MultiPhrase) fix MultiPhraseQuery memory leak (#175)
1c76e25b is described below

commit 1c76e25b55a27823917624c1cb0406c34f77e72d
Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com>
AuthorDate: Wed Jan 10 10:10:10 2024 +0800

    [fix](MultiPhrase) fix MultiPhraseQuery memory leak (#175)
---
 src/core/CLucene/search/MultiPhraseQuery.cpp |   4 +-
 src/test/CMakeLists.txt                      |   1 +
 src/test/query/TestMultiPhraseQuery.cpp      | 163 +++++++++++++++++++++++++++
 src/test/test.h                              |   1 +
 src/test/tests.cpp                           |   1 +
 5 files changed, 168 insertions(+), 2 deletions(-)

diff --git a/src/core/CLucene/search/MultiPhraseQuery.cpp 
b/src/core/CLucene/search/MultiPhraseQuery.cpp
index 54273702..107c8b11 100644
--- a/src/core/CLucene/search/MultiPhraseQuery.cpp
+++ b/src/core/CLucene/search/MultiPhraseQuery.cpp
@@ -211,8 +211,8 @@ Query* MultiPhraseQuery::rewrite(IndexReader* /*reader*/) {
          ArrayBase<Term*>* terms = termArrays->at(0);
          BooleanQuery* boq = _CLNEW BooleanQuery(true);
     for ( size_t i=0;i<terms->length;i++ ){
-                 boq->add(_CLNEW TermQuery((*terms)[i]), 
BooleanClause::SHOULD);
-         }
+                 boq->add(_CLNEW TermQuery((*terms)[i]), true, 
BooleanClause::SHOULD);
+               }
          boq->setBoost(getBoost());
          return boq;
   } else {
diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt
index 20c722f8..69737268 100644
--- a/src/test/CMakeLists.txt
+++ b/src/test/CMakeLists.txt
@@ -102,6 +102,7 @@ SET(test_files ./tests.cpp
         ./util/TestStringBuffer.cpp
         ./util/English.cpp
         ./util/TestStrConvert.cpp
+        ./query/TestMultiPhraseQuery.cpp
         ${test_HEADERS})
 IF (USE_SHARED_OBJECT_FILES)
     GET_SHARED_FILES(clucene_shared_Files)
diff --git a/src/test/query/TestMultiPhraseQuery.cpp 
b/src/test/query/TestMultiPhraseQuery.cpp
new file mode 100644
index 00000000..ccc4fe7f
--- /dev/null
+++ b/src/test/query/TestMultiPhraseQuery.cpp
@@ -0,0 +1,163 @@
+#include <CLucene.h>
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+#include "CLucene/debug/error.h"
+#include "CLucene/index/IndexReader.h"
+#include "CLucene/index/Term.h"
+#include "CLucene/search/MultiPhraseQuery.h"
+#include "CLucene/store/Directory.h"
+#include "CLucene/store/FSDirectory.h"
+#include "CLucene/store/RAMDirectory.h"
+#include "test.h"
+
+CL_NS_USE(util)
+CL_NS_USE(store)
+CL_NS_USE(search)
+CL_NS_USE(index)
+
+void testSimple1Add(CuTest* tc) {
+    RAMDirectory dir;
+
+    SimpleAnalyzer<char> analyzer;
+    IndexWriter w(&dir, &analyzer, true);
+    w.setUseCompoundFile(false);
+    auto field_name = lucene::util::Misc::_charToWide("name");
+    std::string value = "value";
+
+    Document doc;
+    auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
+
+    auto char_string_reader = 
std::make_unique<lucene::util::SStringReader<char>>();
+    char_string_reader->init(value.data(), value.size(), true);
+    auto stream = analyzer.tokenStream(field->name(), 
char_string_reader.get());
+    field->setValue(stream);
+    doc.add(*field);
+
+    w.addDocument(&doc);
+    w.close();
+
+    IndexSearcher index_searcher(&dir);
+    {
+        MultiPhraseQuery query;
+
+        Term* t1 = _CLNEW Term(_T( "name" ), _T( "t1" ));
+        query.add(t1);
+        _CLLDECDELETE(t1);
+
+        std::vector<int32_t> result;
+        index_searcher._search(&query, [&result](const int32_t docid, const 
float_t /*score*/) {
+            result.push_back(docid);
+        });
+        CLUCENE_ASSERT(result.size() == 0);
+    }
+
+    _CLDELETE(stream)
+    _CLDELETE_ARRAY(field_name)
+}
+
+void testSimple2Add(CuTest* tc) {
+    RAMDirectory dir;
+
+    SimpleAnalyzer<char> analyzer;
+    IndexWriter w(&dir, &analyzer, true);
+    w.setUseCompoundFile(false);
+    auto field_name = lucene::util::Misc::_charToWide("name");
+    std::string value = "value";
+
+    Document doc;
+    auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
+
+    auto char_string_reader = 
std::make_unique<lucene::util::SStringReader<char>>();
+    char_string_reader->init(value.data(), value.size(), true);
+    auto stream = analyzer.tokenStream(field->name(), 
char_string_reader.get());
+    field->setValue(stream);
+    doc.add(*field);
+
+    w.addDocument(&doc);
+    w.close();
+
+    IndexSearcher index_searcher(&dir);
+    {
+        MultiPhraseQuery query;
+
+        std::vector<Term*> terms;
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t2" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t3" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t4" )));
+        query.add(terms);
+        for (int32_t i = 0; i < terms.size(); i++) {
+            _CLLDECDELETE(terms[i]);
+        }
+
+        std::vector<int32_t> result;
+        index_searcher._search(&query, [&result](const int32_t docid, const 
float_t /*score*/) {
+            result.push_back(docid);
+        });
+        CLUCENE_ASSERT(result.size() == 0);
+    }
+
+    _CLDELETE(stream)
+    _CLDELETE_ARRAY(field_name)
+}
+
+void testMultiAdd(CuTest* tc) {
+    RAMDirectory dir;
+
+    SimpleAnalyzer<char> analyzer;
+    IndexWriter w(&dir, &analyzer, true);
+    w.setUseCompoundFile(false);
+    auto field_name = lucene::util::Misc::_charToWide("name");
+    std::string value = "value";
+
+    Document doc;
+    auto field = _CLNEW Field(field_name, Field::INDEX_TOKENIZED | 
Field::STORE_NO);
+
+    auto char_string_reader = 
std::make_unique<lucene::util::SStringReader<char>>();
+    char_string_reader->init(value.data(), value.size(), true);
+    auto stream = analyzer.tokenStream(field->name(), 
char_string_reader.get());
+    field->setValue(stream);
+    doc.add(*field);
+
+    w.addDocument(&doc);
+    w.close();
+
+    IndexSearcher index_searcher(&dir);
+    {
+        MultiPhraseQuery query;
+
+        Term* t1 = _CLNEW Term(_T( "name" ), _T( "t1" ));
+        query.add(t1);
+        _CLLDECDELETE(t1);
+
+        std::vector<Term*> terms;
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t2" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t3" )));
+        terms.push_back(_CLNEW Term(_T( "name" ), _T( "t4" )));
+        query.add(terms);
+        for (int32_t i = 0; i < terms.size(); i++) {
+            _CLLDECDELETE(terms[i]);
+        }
+
+        std::vector<int32_t> result;
+        index_searcher._search(&query, [&result](const int32_t docid, const 
float_t /*score*/) {
+            result.push_back(docid);
+        });
+        CLUCENE_ASSERT(result.size() == 0);
+    }
+
+    _CLDELETE(stream)
+    _CLDELETE_ARRAY(field_name)
+}
+
+CuSuite* testMultiPhraseQuery(void) {
+    CuSuite* suite = CuSuiteNew(_T("CLucene MultiPhraseQuery Test"));
+
+    SUITE_ADD_TEST(suite, testSimple1Add);
+    SUITE_ADD_TEST(suite, testSimple2Add);
+    SUITE_ADD_TEST(suite, testMultiAdd);
+
+    return suite;
+}
\ No newline at end of file
diff --git a/src/test/test.h b/src/test/test.h
index 08c168cb..a414af8d 100644
--- a/src/test/test.h
+++ b/src/test/test.h
@@ -82,6 +82,7 @@ CuSuite *testTermVectorsReader(void);
 CuSuite *teststandard95(void);
 CuSuite *testStrConvert(void);
 CuSuite *testSearchRange(void);
+CuSuite *testMultiPhraseQuery(void);
 
 #ifdef TEST_CONTRIB_LIBS
 //CuSuite *testGermanAnalyzer(void);
diff --git a/src/test/tests.cpp b/src/test/tests.cpp
index d703e159..68a872b4 100644
--- a/src/test/tests.cpp
+++ b/src/test/tests.cpp
@@ -16,6 +16,7 @@ unittest tests[] = {
         {"MSBRadixSorter",testMSBRadixSorter},
         {"strconvert", testStrConvert},
         {"searchRange", testSearchRange},
+        {"MultiPhraseQuery", testMultiPhraseQuery},
 #ifdef TEST_CONTRIB_LIBS
         {"chinese", testchinese},
 #endif


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

(doris-thirdparty) branch clucene updated: [fix](MultiPhrase) fix MultiPhraseQuery memory leak (#175)

Reply via email to