This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 81ccfe0eec6 branch-3.0: [opt](checker) Add inverted index file check
for checker #51591 (#52318)
81ccfe0eec6 is described below
commit 81ccfe0eec668408644b3556afb21ffeb6fd10c7
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Jun 26 09:50:00 2025 +0800
branch-3.0: [opt](checker) Add inverted index file check for checker #51591
(#52318)
Cherry-picked from #51591
Co-authored-by: Uniqueyou <[email protected]>
---
cloud/src/recycler/checker.cpp | 144 +++++++++++++++++++++++++++++++-
cloud/test/CMakeLists.txt | 4 +-
cloud/test/mock_accessor.cpp | 183 -----------------------------------------
cloud/test/mock_accessor.h | 158 +++++++++++++++++++++++++++++++++++
cloud/test/recycler_test.cpp | 147 ++++++++++++++++++++++++++++++++-
5 files changed, 448 insertions(+), 188 deletions(-)
diff --git a/cloud/src/recycler/checker.cpp b/cloud/src/recycler/checker.cpp
index 60b6b7fc5ee..6a191b021b2 100644
--- a/cloud/src/recycler/checker.cpp
+++ b/cloud/src/recycler/checker.cpp
@@ -26,10 +26,12 @@
#include <gen_cpp/olap_file.pb.h>
#include <glog/logging.h>
+#include <algorithm>
#include <chrono>
#include <cstdint>
#include <memory>
#include <mutex>
+#include <numeric>
#include <sstream>
#include <string_view>
#include <unordered_set>
@@ -422,7 +424,8 @@ int InstanceChecker::init_storage_vault_accessors(const
InstanceInfoPB& instance
LOG(WARNING) << "malformed storage vault, unable to deserialize
key=" << hex(k);
return -1;
}
-
+
TEST_SYNC_POINT_CALLBACK("InstanceRecycler::init_storage_vault_accessors.mock_vault",
+ &accessor_map_, &vault);
if (vault.has_hdfs_info()) {
auto accessor = std::make_shared<HdfsAccessor>(vault.hdfs_info());
int ret = accessor->init();
@@ -536,6 +539,7 @@ int InstanceChecker::do_check() {
bool data_loss = false;
for (int i = 0; i < rs_meta.num_segments(); ++i) {
auto path = segment_path(rs_meta.tablet_id(),
rs_meta.rowset_id_v2(), i);
+
if (tablet_files_cache.files.contains(path)) {
continue;
}
@@ -549,6 +553,57 @@ int InstanceChecker::do_check() {
LOG(WARNING) << "object not exist, path=" << path << " key=" <<
hex(key);
}
+ std::vector<std::pair<int64_t, std::string>> index_ids;
+ for (const auto& i : rs_meta.tablet_schema().index()) {
+ if (i.has_index_type() && i.index_type() == IndexType::INVERTED) {
+ index_ids.emplace_back(i.index_id(), i.index_suffix_name());
+ }
+ }
+ std::string tablet_idx_key = meta_tablet_idx_key({instance_id_,
rs_meta.tablet_id()});
+ if (!key_exist(txn_kv_.get(), tablet_idx_key)) {
+ for (int i = 0; i < rs_meta.num_segments(); ++i) {
+ std::vector<std::string> index_path_v;
+ std::vector<std::string> loss_file_path;
+ if (rs_meta.tablet_schema().inverted_index_storage_format() ==
+ InvertedIndexStorageFormatPB::V1) {
+ for (const auto& index_id : index_ids) {
+ LOG(INFO) << "check inverted index, tablet_id=" <<
rs_meta.tablet_id()
+ << " rowset_id=" << rs_meta.rowset_id_v2()
+ << " segment_index=" << i << " index_id=" <<
index_id.first
+ << " index_suffix_name=" << index_id.second;
+ index_path_v.emplace_back(
+ inverted_index_path_v1(rs_meta.tablet_id(),
rs_meta.rowset_id_v2(),
+ i, index_id.first,
index_id.second));
+ }
+ } else {
+ index_path_v.emplace_back(
+ inverted_index_path_v2(rs_meta.tablet_id(),
rs_meta.rowset_id_v2(), i));
+ }
+
+ if (!index_path_v.empty()) {
+ if (std::all_of(index_path_v.begin(), index_path_v.end(),
+ [&](const auto& idx_file_path) {
+ if
(!tablet_files_cache.files.contains(idx_file_path)) {
+
loss_file_path.emplace_back(idx_file_path);
+ return false;
+ }
+ return true;
+ })) {
+ continue;
+ }
+ }
+
+ data_loss = true;
+ LOG(WARNING) << "object not exist, path="
+ << std::accumulate(loss_file_path.begin(),
loss_file_path.end(),
+ std::string(),
+ [](const auto& a, const auto&
b) {
+ return a.empty() ? b : a +
", " + b;
+ })
+ << " key=" << hex(tablet_idx_key);
+ }
+ }
+
if (data_loss) {
++num_rowset_loss;
}
@@ -647,6 +702,12 @@ int InstanceChecker::do_inverted_check() {
};
TabletRowsets tablet_rowsets_cache;
+ struct TabletIndexes {
+ int64_t tablet_id {0};
+ std::unordered_set<int64_t> index_ids;
+ };
+ TabletIndexes tablet_indexes_cache;
+
// Return 0 if check success, return 1 if file is garbage data, negative
if error occurred
auto check_segment_file = [&](const std::string& obj_key) {
std::vector<std::string> str;
@@ -724,8 +785,77 @@ int InstanceChecker::do_inverted_check() {
return 0;
};
+ auto check_inverted_index_file = [&](const std::string& obj_key) {
+ std::vector<std::string> str;
+ butil::SplitString(obj_key, '/', &str);
+ // data/{tablet_id}/{rowset_id}_{seg_num}_{idx_id}{idx_suffix}.idx
+ if (str.size() < 3) {
+ return -1;
+ }
+
+ int64_t tablet_id = atol(str[1].c_str());
+ if (tablet_id <= 0) {
+ LOG(WARNING) << "failed to parse tablet_id, key=" << obj_key;
+ return -1;
+ }
+
+ if (!str.back().ends_with(".idx")) {
+ return 0; // Not an index file
+ }
+
+ int64_t index_id;
+
+ size_t pos = str.back().find_last_of('_');
+ if (pos == std::string::npos || pos + 1 >= str.back().size() - 4) {
+ LOG(WARNING) << "Invalid index_id format, key=" << obj_key;
+ return -1;
+ }
+ index_id = atol(str.back().substr(pos + 1, str.back().size() -
4).c_str());
- // TODO(Xiaocc): Currently we haven't implemented one generator-like s3
accessor list function
+ if (tablet_indexes_cache.tablet_id == tablet_id) {
+ if (tablet_indexes_cache.index_ids.contains(index_id)) {
+ return 0;
+ } else {
+ LOG(WARNING) << "index not exists, key=" << obj_key;
+ return -1;
+ }
+ }
+ // Get all index id of this tablet
+ tablet_indexes_cache.tablet_id = tablet_id;
+ tablet_indexes_cache.index_ids.clear();
+ std::unique_ptr<Transaction> txn;
+ TxnErrorCode err = txn_kv_->create_txn(&txn);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to create txn";
+ return -1;
+ }
+ auto tablet_idx_key = meta_tablet_idx_key({instance_id_, tablet_id});
+ std::string tablet_idx_val;
+ err = txn->get(tablet_idx_key, &tablet_idx_val);
+ if (err != TxnErrorCode::TXN_OK) {
+ LOG(WARNING) << "failed to get tablet idx,"
+ << " key=" << hex(tablet_idx_key) << " err=" << err;
+ return -1;
+ }
+
+ TabletIndexPB tablet_idx_pb;
+ if (!tablet_idx_pb.ParseFromArray(tablet_idx_val.data(),
tablet_idx_val.size())) {
+ LOG(WARNING) << "malformed index meta value, key=" <<
hex(tablet_idx_key);
+ return -1;
+ }
+ if (!tablet_idx_pb.has_index_id()) {
+ LOG(WARNING) << "tablet index meta does not have index_id, key="
<< hex(tablet_idx_key);
+ return -1;
+ }
+ tablet_indexes_cache.index_ids.insert(tablet_idx_pb.index_id());
+
+ if (!tablet_indexes_cache.index_ids.contains(index_id)) {
+ LOG(WARNING) << "index should be recycled, key=" << obj_key;
+ return 1;
+ }
+
+ return 0;
+ };
// so we choose to skip here.
TEST_SYNC_POINT_RETURN_WITH_VALUE("InstanceChecker::do_inverted_check",
(int)0);
@@ -748,6 +878,16 @@ int InstanceChecker::do_inverted_check() {
check_ret = -1;
}
}
+ ret = check_inverted_index_file(file->path);
+ if (ret != 0) {
+ LOG(WARNING) << "failed to check index file, uri=" <<
accessor->uri()
+ << " path=" << file->path;
+ if (ret == 1) {
+ ++num_file_leak;
+ } else {
+ check_ret = -1;
+ }
+ }
}
if (!list_iter->is_valid()) {
diff --git a/cloud/test/CMakeLists.txt b/cloud/test/CMakeLists.txt
index 65c9cde561b..8378a33ddfd 100644
--- a/cloud/test/CMakeLists.txt
+++ b/cloud/test/CMakeLists.txt
@@ -17,7 +17,7 @@ add_executable(doris_txn_test doris_txn_test.cpp)
add_executable(txn_kv_test txn_kv_test.cpp)
set_target_properties(txn_kv_test PROPERTIES COMPILE_FLAGS
"-fno-access-control")
-add_executable(recycler_test recycler_test.cpp mock_accessor.cpp)
+add_executable(recycler_test recycler_test.cpp)
add_executable(mem_txn_kv_test mem_txn_kv_test.cpp)
@@ -59,7 +59,7 @@ add_executable(util_test util_test.cpp)
add_executable(network_util_test network_util_test.cpp)
-add_executable(txn_lazy_commit_test txn_lazy_commit_test.cpp mock_accessor.cpp)
+add_executable(txn_lazy_commit_test txn_lazy_commit_test.cpp)
message("Meta-service test dependencies: ${TEST_LINK_LIBS}")
#target_link_libraries(sync_point_test ${TEST_LINK_LIBS})
diff --git a/cloud/test/mock_accessor.cpp b/cloud/test/mock_accessor.cpp
deleted file mode 100644
index f11c5969321..00000000000
--- a/cloud/test/mock_accessor.cpp
+++ /dev/null
@@ -1,183 +0,0 @@
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "mock_accessor.h"
-
-#include <glog/logging.h>
-
-#include <iterator>
-#include <ranges>
-#include <vector>
-
-#include "common/logging.h"
-#include "common/string_util.h"
-#include "cpp/sync_point.h"
-#include "recycler/storage_vault_accessor.h"
-
-namespace doris::cloud {
-
-class MockListIterator final : public ListIterator {
-public:
- MockListIterator(std::vector<std::string> entries) :
entries_(std::move(entries)) {}
- ~MockListIterator() override = default;
-
- bool is_valid() override { return true; }
-
- bool has_next() override { return !entries_.empty(); }
-
- std::optional<FileMeta> next() override {
- std::optional<FileMeta> ret;
- if (has_next()) {
- ret = FileMeta {.path = std::move(entries_.back())};
- entries_.pop_back();
- }
-
- return ret;
- }
-
-private:
- std::vector<std::string> entries_;
-};
-
-MockAccessor::MockAccessor() : StorageVaultAccessor(AccessorType::MOCK) {
- uri_ = "mock";
-}
-
-MockAccessor::~MockAccessor() = default;
-
-auto MockAccessor::get_prefix_range(const std::string& path_prefix) {
- auto begin = objects_.lower_bound(path_prefix);
- if (begin == objects_.end()) {
- return std::make_pair(begin, begin);
- }
-
- auto path1 = path_prefix;
- path1.back() += 1;
- auto end = objects_.lower_bound(path1);
- return std::make_pair(begin, end);
-}
-
-int MockAccessor::delete_prefix_impl(const std::string& path_prefix) {
- TEST_SYNC_POINT("MockAccessor::delete_prefix");
- LOG(INFO) << "delete object of prefix=" << path_prefix;
- std::lock_guard lock(mtx_);
-
- auto [begin, end] = get_prefix_range(path_prefix);
- if (begin == end) {
- return 0;
- }
-
- objects_.erase(begin, end);
- return 0;
-}
-
-int MockAccessor::delete_prefix(const std::string& path_prefix, int64_t
expiration_time) {
- auto norm_path_prefix = path_prefix;
- strip_leading(norm_path_prefix, "/");
- if (norm_path_prefix.empty()) {
- LOG_WARNING("invalid dir_path {}", path_prefix);
- return -1;
- }
-
- return delete_prefix_impl(norm_path_prefix);
-}
-
-int MockAccessor::delete_directory(const std::string& dir_path) {
- auto norm_dir_path = dir_path;
- strip_leading(norm_dir_path, "/");
- if (norm_dir_path.empty()) {
- LOG_WARNING("invalid dir_path {}", dir_path);
- return -1;
- }
-
- return delete_prefix_impl(!norm_dir_path.ends_with('/') ? norm_dir_path +
'/' : norm_dir_path);
-}
-
-int MockAccessor::delete_all(int64_t expiration_time) {
- std::lock_guard lock(mtx_);
- objects_.clear();
- return 0;
-}
-
-int MockAccessor::delete_files(const std::vector<std::string>& paths) {
- TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_files", (int)0);
-
- for (auto&& path : paths) {
- delete_file(path);
- }
- return 0;
-}
-
-int MockAccessor::delete_file(const std::string& path) {
- LOG(INFO) << "delete object path=" << path;
- std::lock_guard lock(mtx_);
- objects_.erase(path);
- return 0;
-}
-
-int MockAccessor::put_file(const std::string& path, const std::string&
content) {
- std::lock_guard lock(mtx_);
- objects_.insert(path);
- return 0;
-}
-
-int MockAccessor::list_all(std::unique_ptr<ListIterator>* res) {
- std::vector<std::string> entries;
-
- {
- std::lock_guard lock(mtx_);
- entries.reserve(objects_.size());
- entries.assign(objects_.rbegin(), objects_.rend());
- }
-
- *res = std::make_unique<MockListIterator>(std::move(entries));
-
- return 0;
-}
-
-int MockAccessor::list_directory(const std::string& dir_path,
std::unique_ptr<ListIterator>* res) {
- auto norm_dir_path = dir_path;
- strip_leading(norm_dir_path, "/");
- if (norm_dir_path.empty()) {
- LOG_WARNING("invalid dir_path {}", dir_path);
- return -1;
- }
-
- std::vector<std::string> entries;
-
- {
- std::lock_guard lock(mtx_);
- auto [begin, end] = get_prefix_range(norm_dir_path);
- if (begin != end) {
- entries.reserve(std::distance(begin, end));
- std::ranges::copy(std::ranges::subrange(begin, end) |
std::ranges::views::reverse,
- std::back_inserter(entries));
- }
- }
-
- *res = std::make_unique<MockListIterator>(std::move(entries));
-
- return 0;
-}
-
-int MockAccessor::exists(const std::string& path) {
- std::lock_guard lock(mtx_);
- return !objects_.contains(path);
-}
-
-} // namespace doris::cloud
diff --git a/cloud/test/mock_accessor.h b/cloud/test/mock_accessor.h
index ba8ede32462..4e209d93261 100644
--- a/cloud/test/mock_accessor.h
+++ b/cloud/test/mock_accessor.h
@@ -17,12 +17,43 @@
#pragma once
+#include <glog/logging.h>
+
+#include <iterator>
#include <mutex>
+#include <ranges>
#include <set>
+#include <vector>
+#include "common/logging.h"
+#include "common/string_util.h"
+#include "cpp/sync_point.h"
+#include "mock_accessor.h"
#include "recycler/storage_vault_accessor.h"
namespace doris::cloud {
+class MockListIterator final : public ListIterator {
+public:
+ MockListIterator(std::vector<std::string> entries) :
entries_(std::move(entries)) {}
+ ~MockListIterator() override = default;
+
+ bool is_valid() override { return true; }
+
+ bool has_next() override { return !entries_.empty(); }
+
+ std::optional<FileMeta> next() override {
+ std::optional<FileMeta> ret;
+ if (has_next()) {
+ ret = FileMeta {.path = std::move(entries_.back())};
+ entries_.pop_back();
+ }
+
+ return ret;
+ }
+
+private:
+ std::vector<std::string> entries_;
+};
class MockAccessor final : public StorageVaultAccessor {
public:
@@ -57,4 +88,131 @@ private:
std::set<std::string> objects_;
};
+inline MockAccessor::MockAccessor() : StorageVaultAccessor(AccessorType::MOCK)
{
+ uri_ = "mock";
+}
+
+inline MockAccessor::~MockAccessor() = default;
+
+inline auto MockAccessor::get_prefix_range(const std::string& path_prefix) {
+ auto begin = objects_.lower_bound(path_prefix);
+ if (begin == objects_.end()) {
+ return std::make_pair(begin, begin);
+ }
+
+ auto path1 = path_prefix;
+ path1.back() += 1;
+ auto end = objects_.lower_bound(path1);
+ return std::make_pair(begin, end);
+}
+
+inline int MockAccessor::delete_prefix_impl(const std::string& path_prefix) {
+ TEST_SYNC_POINT("MockAccessor::delete_prefix");
+ LOG(INFO) << "delete object of prefix=" << path_prefix;
+ std::lock_guard lock(mtx_);
+
+ auto [begin, end] = get_prefix_range(path_prefix);
+ if (begin == end) {
+ return 0;
+ }
+
+ objects_.erase(begin, end);
+ return 0;
+}
+
+inline int MockAccessor::delete_prefix(const std::string& path_prefix, int64_t
expiration_time) {
+ auto norm_path_prefix = path_prefix;
+ strip_leading(norm_path_prefix, "/");
+ if (norm_path_prefix.empty()) {
+ LOG_WARNING("invalid dir_path {}", path_prefix);
+ return -1;
+ }
+
+ return delete_prefix_impl(norm_path_prefix);
+}
+
+inline int MockAccessor::delete_directory(const std::string& dir_path) {
+ auto norm_dir_path = dir_path;
+ strip_leading(norm_dir_path, "/");
+ if (norm_dir_path.empty()) {
+ LOG_WARNING("invalid dir_path {}", dir_path);
+ return -1;
+ }
+
+ return delete_prefix_impl(!norm_dir_path.ends_with('/') ? norm_dir_path +
'/' : norm_dir_path);
+}
+
+inline int MockAccessor::delete_all(int64_t expiration_time) {
+ std::lock_guard lock(mtx_);
+ objects_.clear();
+ return 0;
+}
+
+inline int MockAccessor::delete_files(const std::vector<std::string>& paths) {
+ TEST_SYNC_POINT_RETURN_WITH_VALUE("MockAccessor::delete_files", (int)0);
+
+ for (auto&& path : paths) {
+ delete_file(path);
+ }
+ return 0;
+}
+
+inline int MockAccessor::delete_file(const std::string& path) {
+ LOG(INFO) << "delete object path=" << path;
+ std::lock_guard lock(mtx_);
+ objects_.erase(path);
+ return 0;
+}
+
+inline int MockAccessor::put_file(const std::string& path, const std::string&
content) {
+ std::lock_guard lock(mtx_);
+ objects_.insert(path);
+ return 0;
+}
+
+inline int MockAccessor::list_all(std::unique_ptr<ListIterator>* res) {
+ std::vector<std::string> entries;
+
+ {
+ std::lock_guard lock(mtx_);
+ entries.reserve(objects_.size());
+ entries.assign(objects_.rbegin(), objects_.rend());
+ }
+
+ *res = std::make_unique<MockListIterator>(std::move(entries));
+
+ return 0;
+}
+
+inline int MockAccessor::list_directory(const std::string& dir_path,
+ std::unique_ptr<ListIterator>* res) {
+ auto norm_dir_path = dir_path;
+ strip_leading(norm_dir_path, "/");
+ if (norm_dir_path.empty()) {
+ LOG_WARNING("invalid dir_path {}", dir_path);
+ return -1;
+ }
+
+ std::vector<std::string> entries;
+
+ {
+ std::lock_guard lock(mtx_);
+ auto [begin, end] = get_prefix_range(norm_dir_path);
+ if (begin != end) {
+ entries.reserve(std::distance(begin, end));
+ std::ranges::copy(std::ranges::subrange(begin, end) |
std::ranges::views::reverse,
+ std::back_inserter(entries));
+ }
+ }
+
+ *res = std::make_unique<MockListIterator>(std::move(entries));
+
+ return 0;
+}
+
+inline int MockAccessor::exists(const std::string& path) {
+ std::lock_guard lock(mtx_);
+ return !objects_.contains(path);
+}
+
} // namespace doris::cloud
diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp
index bcd7dd39160..da5dcc1556b 100644
--- a/cloud/test/recycler_test.cpp
+++ b/cloud/test/recycler_test.cpp
@@ -17,6 +17,7 @@
#include "recycler/recycler.h"
+#include <butil/strings/string_split.h>
#include <fmt/core.h>
#include <gen_cpp/cloud.pb.h>
#include <gen_cpp/olap_file.pb.h>
@@ -258,8 +259,11 @@ static int create_committed_rowset(TxnKv* txn_kv,
StorageVaultAccessor* accessor
rowset_pb.set_creation_time(current_time);
if (num_inverted_indexes > 0) {
auto schema = rowset_pb.mutable_tablet_schema();
+
schema->set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V1);
for (int i = 0; i < num_inverted_indexes; ++i) {
- schema->add_index()->set_index_id(i);
+ auto index = schema->add_index();
+ index->set_index_id(i);
+ index->set_index_type(IndexType::INVERTED);
}
}
rowset_pb.SerializeToString(&val);
@@ -277,6 +281,24 @@ static int create_committed_rowset(TxnKv* txn_kv,
StorageVaultAccessor* accessor
auto path = segment_path(tablet_id, rowset_id, i);
accessor->put_file(path, "");
for (int j = 0; j < num_inverted_indexes; ++j) {
+ std::string key1;
+ std::string val1;
+ MetaTabletIdxKeyInfo key_info1 {instance_id, tablet_id};
+ meta_tablet_idx_key(key_info1, &key1);
+ TabletIndexPB tablet_table;
+ tablet_table.set_db_id(db_id);
+ tablet_table.set_index_id(j);
+ tablet_table.set_tablet_id(tablet_id);
+ if (!tablet_table.SerializeToString(&val1)) {
+ return -1;
+ }
+ if (txn_kv->create_txn(&txn) != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
+ txn->put(key1, val1);
+ if (txn->commit() != TxnErrorCode::TXN_OK) {
+ return -1;
+ }
auto path = inverted_index_path_v1(tablet_id, rowset_id, i, j, "");
accessor->put_file(path, "");
}
@@ -2470,6 +2492,129 @@ TEST(CheckerTest, DISABLED_abnormal_inverted_check) {
ASSERT_NE(checker.do_inverted_check(), 0);
}
+TEST(CheckerTest, inverted_check_recycle_idx_file) {
+ auto* sp = SyncPoint::get_instance();
+ std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
[&sp](int*) {
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ });
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+ obj_info->set_ak(config::test_s3_ak);
+ obj_info->set_sk(config::test_s3_sk);
+ obj_info->set_endpoint(config::test_s3_endpoint);
+ obj_info->set_region(config::test_s3_region);
+ obj_info->set_bucket(config::test_s3_bucket);
+ obj_info->set_prefix("CheckerTest");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ // Add some visible rowsets along with some rowsets that should be recycled
+ // call inverted check after do recycle which would sweep all the rowsets
not visible
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ sp->set_call_back(
+ "InstanceRecycler::init_storage_vault_accessors.mock_vault",
[&accessor](auto&& args) {
+ auto* map = try_any_cast<
+ std::unordered_map<std::string,
std::shared_ptr<StorageVaultAccessor>>*>(
+ args[0]);
+ auto* vault = try_any_cast<StorageVaultPB*>(args[1]);
+ if (vault->name() == "test_success_hdfs_vault") {
+ map->emplace(vault->id(), accessor);
+ }
+ });
+ sp->enable_processing();
+
+ for (int t = 10001; t <= 10100; ++t) {
+ for (int v = 0; v < 10; ++v) {
+ int ret = create_committed_rowset(txn_kv.get(), accessor.get(),
"1", t, v, 1, 3);
+ ASSERT_EQ(ret, 0) << "Failed to create committed rs: " << ret;
+ }
+ }
+ std::unique_ptr<ListIterator> list_iter;
+ int ret = accessor->list_directory("data", &list_iter);
+ ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+
+ int64_t tablet_id_to_delete_index = -1;
+ for (auto file = list_iter->next(); file.has_value(); file =
list_iter->next()) {
+ std::vector<std::string> str;
+ butil::SplitString(file->path, '/', &str);
+ int64_t tablet_id = atol(str[1].c_str());
+
+ // only delete one index files of ever tablet for mock recycle
+ // The reason for not select "delete all idx file" is that inverted
checking cannot handle this case
+ // forward checking is required.
+ if (file->path.ends_with(".idx") && tablet_id_to_delete_index !=
tablet_id) {
+ accessor->delete_file(file->path);
+ tablet_id_to_delete_index = tablet_id;
+ }
+ }
+ ASSERT_EQ(checker.do_inverted_check(), 1);
+}
+
+TEST(CheckerTest, forward_check_recycle_idx_file) {
+ auto* sp = SyncPoint::get_instance();
+ std::unique_ptr<int, std::function<void(int*)>> defer((int*)0x01,
[&sp](int*) {
+ sp->clear_all_call_backs();
+ sp->disable_processing();
+ });
+
+ auto txn_kv = std::make_shared<MemTxnKv>();
+ ASSERT_EQ(txn_kv->init(), 0);
+
+ InstanceInfoPB instance;
+ instance.set_instance_id(instance_id);
+ auto obj_info = instance.add_obj_info();
+ obj_info->set_id("1");
+ obj_info->set_ak(config::test_s3_ak);
+ obj_info->set_sk(config::test_s3_sk);
+ obj_info->set_endpoint(config::test_s3_endpoint);
+ obj_info->set_region(config::test_s3_region);
+ obj_info->set_bucket(config::test_s3_bucket);
+ obj_info->set_prefix("CheckerTest");
+
+ InstanceChecker checker(txn_kv, instance_id);
+ ASSERT_EQ(checker.init(instance), 0);
+ // Add some visible rowsets along with some rowsets that should be recycled
+ // call inverted check after do recycle which would sweep all the rowsets
not visible
+ auto accessor = checker.accessor_map_.begin()->second;
+
+ sp->set_call_back(
+ "InstanceRecycler::init_storage_vault_accessors.mock_vault",
[&accessor](auto&& args) {
+ auto* map = try_any_cast<
+ std::unordered_map<std::string,
std::shared_ptr<StorageVaultAccessor>>*>(
+ args[0]);
+ auto* vault = try_any_cast<StorageVaultPB*>(args[1]);
+ if (vault->name() == "test_success_hdfs_vault") {
+ map->emplace(vault->id(), accessor);
+ }
+ });
+ sp->enable_processing();
+
+ for (int t = 10001; t <= 10100; ++t) {
+ for (int v = 0; v < 10; ++v) {
+ create_committed_rowset(txn_kv.get(), accessor.get(), "1", t, v,
1, 3);
+ }
+ }
+ std::unique_ptr<ListIterator> list_iter;
+ int ret = accessor->list_directory("data", &list_iter);
+ ASSERT_EQ(ret, 0) << "Failed to list directory: " << ret;
+
+ for (auto file = list_iter->next(); file.has_value(); file =
list_iter->next()) {
+ // delete all index files of ever tablet for mock recycle
+ if (file->path.ends_with(".idx")) {
+ accessor->delete_file(file->path);
+ }
+ }
+ ASSERT_EQ(checker.do_check(), 1);
+}
+
TEST(CheckerTest, normal) {
auto txn_kv = std::make_shared<MemTxnKv>();
ASSERT_EQ(txn_kv->init(), 0);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]