This is an automated email from the ASF dual-hosted git repository. airborne pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new e9e90b92f26 [fix](inverted index) Fix incorrect exception handling (#41874) e9e90b92f26 is described below commit e9e90b92f26062973885558932fa3434cea7cdbc Author: zzzxl <33418555+zzzxl1...@users.noreply.github.com> AuthorDate: Wed Oct 16 13:50:23 2024 +0800 [fix](inverted index) Fix incorrect exception handling (#41874) ## Proposed changes 1. Memory leaks may occur in exceptional situations --- be/src/clucene | 2 +- .../inverted_index/index_compaction_test.cpp | 221 +++++++++++++++++++++ 2 files changed, 222 insertions(+), 1 deletion(-) diff --git a/be/src/clucene b/be/src/clucene index 5e9566ab364..5a458e6112b 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 5e9566ab364d71b64c436ee46e5c848eed0ab7f7 +Subproject commit 5a458e6112b7e5010262594147adf22830b096e6 diff --git a/be/test/olap/rowset/segment_v2/inverted_index/index_compaction_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/index_compaction_test.cpp new file mode 100644 index 00000000000..1343cfd6e34 --- /dev/null +++ b/be/test/olap/rowset/segment_v2/inverted_index/index_compaction_test.cpp @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include <gtest/gtest.h> + +#include <exception> +#include <stdexcept> +#include <string> +#include <vector> + +#include "common/logging.h" +#include "io/fs/local_file_system.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wshadow-field" +#include <CLucene.h> // IWYU pragma: keep +#include <CLucene/index/IndexReader.h> +#include <CLucene/search/query/TermPositionIterator.h> +#include <CLucene/util/stringUtil.h> + +#include "CLucene/analysis/Analyzers.h" +#include "CLucene/store/FSDirectory.h" +#pragma GCC diagnostic pop + +CL_NS_USE(search) +CL_NS_USE(store) +CL_NS_USE(index) +CL_NS_USE(util) + +#define FINALLY(eptr, finallyBlock) \ + { \ + finallyBlock; \ + if (eptr) { \ + std::rethrow_exception(eptr); \ + } \ + } + +namespace doris::segment_v2 { + +class IndexCompactionTest : public testing::Test { +public: + const std::string kTestDir = "./ut_dir/index_compress_test"; + + void SetUp() override { + auto st = io::global_local_filesystem()->delete_directory(kTestDir); + ASSERT_TRUE(st.ok()) << st; + st = io::global_local_filesystem()->create_directory(kTestDir); + ASSERT_TRUE(st.ok()) << st; + } + void TearDown() override { + EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kTestDir).ok()); + } + + IndexCompactionTest() = default; + ~IndexCompactionTest() override = default; + + static constexpr int32_t doc_count = 100000; +}; + +int32_t getDaySeed() { + std::time_t now = std::time(nullptr); + std::tm* localTime = std::localtime(&now); + localTime->tm_sec = 0; + localTime->tm_min = 0; + localTime->tm_hour = 0; + return static_cast<int32_t>(std::mktime(localTime) / (60 * 60 * 24)); +} + +static std::string generateRandomIP() { + std::string ip_v4; + ip_v4.append(std::to_string(rand() % 256)); + ip_v4.append("."); + ip_v4.append(std::to_string(rand() % 256)); + ip_v4.append("."); + ip_v4.append(std::to_string(rand() % 256)); + ip_v4.append("."); + ip_v4.append(std::to_string(rand() % 256)); + return ip_v4; +} + +static void write_index(const std::string& name, const std::string& path, bool has_prox, + const std::vector<std::string>& datas) { + auto* analyzer = _CLNEW lucene::analysis::SimpleAnalyzer<char>; + analyzer->set_stopwords(nullptr); + auto* indexwriter = _CLNEW lucene::index::IndexWriter(path.c_str(), analyzer, true); + indexwriter->setRAMBufferSizeMB(512); + indexwriter->setMaxBufferedDocs(-1); + indexwriter->setMaxFieldLength(0x7FFFFFFFL); + indexwriter->setMergeFactor(1000000000); + indexwriter->setUseCompoundFile(false); + + auto* char_string_reader = _CLNEW lucene::util::SStringReader<char>; + + auto* doc = _CLNEW lucene::document::Document(); + int32_t field_config = lucene::document::Field::STORE_NO; + field_config |= lucene::document::Field::INDEX_NONORMS; + field_config |= lucene::document::Field::INDEX_TOKENIZED; + auto field_name = std::wstring(name.begin(), name.end()); + auto* field = _CLNEW lucene::document::Field(field_name.c_str(), field_config); + field->setOmitTermFreqAndPositions(has_prox); + doc->add(*field); + + for (const auto& data : datas) { + char_string_reader->init(data.data(), data.size(), false); + auto* stream = analyzer->reusableTokenStream(field->name(), char_string_reader); + field->setValue(stream); + indexwriter->addDocument(doc); + } + + indexwriter->close(); + + _CLLDELETE(indexwriter); + _CLLDELETE(doc); + _CLLDELETE(analyzer); + _CLLDELETE(char_string_reader); +} + +static void index_compaction(const std::string& path, + std::vector<lucene::store::Directory*> srcDirs, + std::vector<lucene::store::Directory*> destDirs, int32_t count) { + auto* analyzer = _CLNEW lucene::analysis::SimpleAnalyzer<char>; + auto* indexwriter = _CLNEW lucene::index::IndexWriter(path.c_str(), analyzer, true); + + std::vector<std::vector<std::pair<uint32_t, uint32_t>>> trans_vec( + srcDirs.size(), std::vector<std::pair<uint32_t, uint32_t>>(count)); + int32_t idx = 0; + int32_t id = 0; + for (int32_t i = 0; i < count; i++) { + for (int32_t j = 0; j < srcDirs.size(); j++) { + if (id == count * destDirs.size()) { + idx++; + id = 0; + } + trans_vec[j][i] = std::make_pair(idx, id++); + } + } + + std::vector<uint32_t> dest_index_docs(destDirs.size()); + for (int32_t i = 0; i < destDirs.size(); i++) { + dest_index_docs[i] = count * destDirs.size(); + } + + std::exception_ptr eptr; + try { + indexwriter->indexCompaction(srcDirs, destDirs, trans_vec, dest_index_docs); + } catch (...) { + eptr = std::current_exception(); + } + FINALLY(eptr, { + indexwriter->close(); + _CLDELETE(indexwriter); + _CLDELETE(analyzer); + }) +} + +TEST_F(IndexCompactionTest, test_compaction_exception) { + std::srand(getDaySeed()); + std::string name = "field_name"; + + // index v1 + { + std::string path = kTestDir + "/index1"; + std::vector<std::string> datas; + for (int32_t i = 0; i < 10; i++) { + std::string ip_v4 = generateRandomIP(); + datas.emplace_back(ip_v4); + } + write_index(name, path, true, datas); + } + + // index v2 + { + std::string path = kTestDir + "/index2"; + std::vector<std::string> datas; + for (int32_t i = 0; i < 10; i++) { + std::string ip_v4 = generateRandomIP(); + datas.emplace_back(ip_v4); + } + write_index(name, path, false, datas); + } + + // index compaction exception 1 + { + std::vector<lucene::store::Directory*> srcDirs; + srcDirs.push_back(FSDirectory::getDirectory(std::string(kTestDir + "/index1").c_str())); + srcDirs.push_back(FSDirectory::getDirectory(std::string(kTestDir + "/index2").c_str())); + std::vector<lucene::store::Directory*> destDirs; + destDirs.push_back(FSDirectory::getDirectory(std::string(kTestDir + "/index4").c_str())); + + std::string path = kTestDir + "/index0"; + try { + index_compaction(path, srcDirs, destDirs, 10); + } catch (const CLuceneError& e) { + EXPECT_EQ(e.number(), CL_ERR_IllegalArgument); + } + for (auto& p : srcDirs) { + p->close(); + _CLDECDELETE(p); + } + for (auto& p : destDirs) { + p->close(); + _CLDECDELETE(p); + } + } +} + +} // namespace doris::segment_v2 \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org