This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new af779f5cd8f Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc (#32793)" (#35978) af779f5cd8f is described below commit af779f5cd8ff2ad2986313ea65b707a5b6bca217 Author: plat1ko <platonekos...@gmail.com> AuthorDate: Thu Jun 6 22:24:30 2024 +0800 Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc (#32793)" (#35978) ## Proposed changes Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc (#32793)" --- be/src/olap/data_dir.cpp | 281 ++++++++++++++++++++--------------------- be/src/olap/data_dir.h | 6 +- be/src/olap/tablet_manager.cpp | 2 +- be/src/runtime/exec_env.cpp | 3 - be/test/olap/path_gc_test.cpp | 19 +-- 5 files changed, 140 insertions(+), 171 deletions(-) diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 72604bcca94..03027184357 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -662,170 +662,144 @@ Status DataDir::load() { return Status::OK(); } -void DataDir::perform_path_gc() { - auto tablet_paths = _perform_path_scan(); - _perform_path_gc_by_tablet(tablet_paths); - _perform_path_gc_by_rowset(tablet_paths); -} +// gc unused local tablet dir +void DataDir::_perform_tablet_gc(const std::string& tablet_schema_hash_path) { + if (_stop_bg_worker) { + return; + } -// gc unused tablet schemahash dir -void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths) { - if (_stop_bg_worker) return; - if (tablet_paths.empty()) { + TTabletId tablet_id = -1; + TSchemaHash schema_hash = -1; + bool is_valid = TabletManager::get_tablet_id_and_schema_hash_from_path( + tablet_schema_hash_path, &tablet_id, &schema_hash); + if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { + LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path; return; } - LOG(INFO) << "start to path gc by tablet, dir=" << _path; - // Use double pointer to avoid move elements after erase a element - auto forward = tablet_paths.begin(); - auto backward = tablet_paths.end(); - int counter = 0; - do { - if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { - std::this_thread::sleep_for( - std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); - } - auto& path = *forward; - TTabletId tablet_id = -1; - TSchemaHash schema_hash = -1; - bool is_valid = _tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id, - &schema_hash); - if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { - LOG(WARNING) << "unknown path:" << path; - --backward; - std::swap(*forward, *backward); - continue; - } - auto tablet = _tablet_manager->get_tablet(tablet_id); - if (!tablet || tablet->data_dir() != this) { - if (tablet) { - LOG(INFO) << "The tablet in path " << path - << " is not same with the running one: " << tablet->data_dir()->_path - << "/" << tablet->tablet_path() - << ", might be the old tablet after migration, try to move it to trash"; - } - _tablet_manager->try_delete_unused_tablet_path(this, tablet_id, schema_hash, path); - --backward; - std::swap(*forward, *backward); - continue; + + auto tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); + if (!tablet || tablet->data_dir() != this) { + if (tablet) { + LOG(INFO) << "The tablet in path " << tablet_schema_hash_path + << " is not same with the running one: " << tablet->data_dir()->_path << "/" + << tablet->tablet_path() + << ", might be the old tablet after migration, try to move it to trash"; } - // could find the tablet, then skip check it - ++forward; - } while (forward != backward && !_stop_bg_worker); - tablet_paths.erase(backward, tablet_paths.end()); - LOG(INFO) << "finish path gc by tablet, dir=" << _path; + StorageEngine::instance()->tablet_manager()->try_delete_unused_tablet_path( + this, tablet_id, schema_hash, tablet_schema_hash_path); + return; + } + + _perform_rowset_gc(tablet_schema_hash_path); } -void DataDir::_perform_path_gc_by_rowset(const std::vector<std::string>& tablet_paths) { - if (_stop_bg_worker || tablet_paths.empty()) { +// gc unused local rowsets under tablet dir +void DataDir::_perform_rowset_gc(const std::string& tablet_schema_hash_path) { + if (_stop_bg_worker) { return; } - LOG(INFO) << "start to path gc by rowset"; - int counter = 0; - for (const auto& path : tablet_paths) { - if (_stop_bg_worker) { - break; - } + TTabletId tablet_id = -1; + TSchemaHash schema_hash = -1; + bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path( + tablet_schema_hash_path, &tablet_id, &schema_hash); + if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { + LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path; + return; + } - ++counter; - if (config::path_gc_check_step > 0 && counter % config::path_gc_check_step == 0) { - std::this_thread::sleep_for( - std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); - } - TTabletId tablet_id = -1; - TSchemaHash schema_hash = -1; - bool is_valid = doris::TabletManager::get_tablet_id_and_schema_hash_from_path( - path, &tablet_id, &schema_hash); - if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] { - LOG(WARNING) << "[path gc] unknown path:" << path; - continue; - } + auto tablet = StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id); + if (!tablet) { + // Could not found the tablet, maybe it's a dropped tablet, will be reclaimed + // in the next time `_perform_path_gc_by_tablet` + return; + } - auto tablet = _tablet_manager->get_tablet(tablet_id); - if (!tablet) { - // Could not found the tablet, maybe it's a dropped tablet, will be reclaimed - // in the next time `_perform_path_gc_by_tablet` - continue; - } + if (tablet->data_dir() != this) { + // Current running tablet is not in same data_dir, maybe it's a tablet after migration, + // will be reclaimed in the next time `_perform_path_gc_by_tablet` + return; + } - if (tablet->data_dir() != this) { - // Current running tablet is not in same data_dir, maybe it's a tablet after migration, - // will be reclaimed in the next time `_perform_path_gc_by_tablet` - continue; + bool exists; + std::vector<io::FileInfo> files; + auto st = io::global_local_filesystem()->list(tablet_schema_hash_path, true, &files, &exists); + if (!st.ok()) [[unlikely]] { + LOG(WARNING) << "[path gc] fail to list tablet path " << tablet_schema_hash_path << " : " + << st; + return; + } + + // Rowset files excluding pending rowsets + std::vector<std::pair<RowsetId, std::string /* filename */>> rowsets_not_pending; + for (auto&& file : files) { + auto rowset_id = extract_rowset_id(file.file_name); + if (rowset_id.hi == 0) { + continue; // Not a rowset } - bool exists; - std::vector<io::FileInfo> files; - auto st = io::global_local_filesystem()->list(path, true, &files, &exists); - if (!st.ok()) [[unlikely]] { - LOG(WARNING) << "[path gc] fail to list tablet path " << path << " : " << st; - continue; + if (StorageEngine::instance()->pending_local_rowsets().contains(rowset_id)) { + continue; // Pending rowset file } - // Rowset files excluding pending rowsets - std::vector<std::pair<RowsetId, std::string /* filename */>> rowsets_not_pending; - for (auto&& file : files) { - auto rowset_id = extract_rowset_id(file.file_name); - if (rowset_id.hi == 0) { - continue; // Not a rowset - } + rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name)); + } - if (StorageEngine::instance()->pending_local_rowsets().contains(rowset_id)) { - continue; // Pending rowset file - } + RowsetIdUnorderedSet rowsets_in_version_map; + tablet->traverse_rowsets( + [&rowsets_in_version_map](auto& rs) { rowsets_in_version_map.insert(rs->rowset_id()); }, + true); - rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name)); + auto reclaim_rowset_file = [](const std::string& path) { + auto st = io::global_local_filesystem()->delete_file(path); + if (!st.ok()) [[unlikely]] { + LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st; + return; } + LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log + }; - RowsetIdUnorderedSet rowsets_in_version_map; - tablet->traverse_rowsets( - [&rowsets_in_version_map](auto& rs) { - rowsets_in_version_map.insert(rs->rowset_id()); - }, - true); + auto should_reclaim = [&, this](const RowsetId& rowset_id) { + return !rowsets_in_version_map.contains(rowset_id) && + !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) && + RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), rowset_id) + .is<META_KEY_NOT_FOUND>(); + }; - auto reclaim_rowset_file = [](const std::string& path) { - auto st = io::global_local_filesystem()->delete_file(path); - if (!st.ok()) [[unlikely]] { - LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " << st; - return; - } - LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log - }; - - auto should_reclaim = [&, this](const RowsetId& rowset_id) { - return !rowsets_in_version_map.contains(rowset_id) && - !StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) && - RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), rowset_id) - .is<META_KEY_NOT_FOUND>(); - }; - - // rowset_id -> is_garbage - std::unordered_map<RowsetId, bool> checked_rowsets; - for (auto&& [rowset_id, filename] : rowsets_not_pending) { - if (auto it = checked_rowsets.find(rowset_id); it != checked_rowsets.end()) { - if (it->second) { // Is checked garbage rowset - reclaim_rowset_file(path + '/' + filename); - } - continue; + // rowset_id -> is_garbage + std::unordered_map<RowsetId, bool> checked_rowsets; + for (auto&& [rowset_id, filename] : rowsets_not_pending) { + if (_stop_bg_worker) { + return; + } + + if (auto it = checked_rowsets.find(rowset_id); it != checked_rowsets.end()) { + if (it->second) { // Is checked garbage rowset + reclaim_rowset_file(tablet_schema_hash_path + '/' + filename); } + continue; + } - if (should_reclaim(rowset_id)) { - reclaim_rowset_file(path + '/' + filename); - checked_rowsets.emplace(rowset_id, true); - } else { - checked_rowsets.emplace(rowset_id, false); + if (should_reclaim(rowset_id)) { + if (config::path_gc_check_step > 0 && + ++_path_gc_step % config::path_gc_check_step == 0) { + std::this_thread::sleep_for( + std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); } + reclaim_rowset_file(tablet_schema_hash_path + '/' + filename); + checked_rowsets.emplace(rowset_id, true); + } else { + checked_rowsets.emplace(rowset_id, false); } } - LOG(INFO) << "finish path gc by rowset."; } -// path producer -std::vector<std::string> DataDir::_perform_path_scan() { - std::vector<std::string> tablet_paths; - if (_stop_bg_worker) return tablet_paths; - LOG(INFO) << "start to scan data dir " << _path; +void DataDir::perform_path_gc() { + if (_stop_bg_worker) { + return; + } + + LOG(INFO) << "start to gc data dir " << _path; auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX); std::vector<io::FileInfo> shards; bool exists = true; @@ -833,13 +807,18 @@ std::vector<std::string> DataDir::_perform_path_scan() { auto st = fs->list(data_path, false, &shards, &exists); if (!st.ok()) [[unlikely]] { LOG(WARNING) << "failed to scan data dir: " << st; - return tablet_paths; + return; } + for (const auto& shard : shards) { - if (_stop_bg_worker) break; + if (_stop_bg_worker) { + break; + } + if (shard.is_file) { continue; } + auto shard_path = fmt::format("{}/{}", data_path, shard.file_name); std::vector<io::FileInfo> tablet_ids; st = io::global_local_filesystem()->list(shard_path, false, &tablet_ids, &exists); @@ -847,31 +826,41 @@ std::vector<std::string> DataDir::_perform_path_scan() { LOG(WARNING) << "fail to walk dir, shard_path=" << shard_path << " : " << st; continue; } + for (const auto& tablet_id : tablet_ids) { if (_stop_bg_worker) { break; } + if (tablet_id.is_file) { continue; } + auto tablet_id_path = fmt::format("{}/{}", shard_path, tablet_id.file_name); std::vector<io::FileInfo> schema_hashes; - st = io::global_local_filesystem()->list(tablet_id_path, false, &schema_hashes, - &exists); + st = fs->list(tablet_id_path, false, &schema_hashes, &exists); if (!st.ok()) [[unlikely]] { LOG(WARNING) << "fail to walk dir, tablet_id_path=" << tablet_id_path << " : " << st; continue; } - if (schema_hashes.size() != 1 || schema_hashes[0].is_file) [[unlikely]] { - LOG(WARNING) << "invalid tablet_path, path=" << tablet_id_path; - continue; + + for (auto&& schema_hash : schema_hashes) { + if (schema_hash.is_file) { + continue; + } + + if (config::path_gc_check_step > 0 && + ++_path_gc_step % config::path_gc_check_step == 0) { + std::this_thread::sleep_for( + std::chrono::milliseconds(config::path_gc_check_step_interval_ms)); + } + _perform_tablet_gc(tablet_id_path + '/' + schema_hash.file_name); } - tablet_paths.push_back(tablet_id_path + '/' + schema_hashes[0].file_name); } } - LOG(INFO) << "scan data dir path: " << _path << " finished. path size: " << tablet_paths.size(); - return tablet_paths; + + LOG(INFO) << "gc data dir path: " << _path << " finished"; } Status DataDir::update_capacity() { diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h index 424315b79a2..46abc75934d 100644 --- a/be/src/olap/data_dir.h +++ b/be/src/olap/data_dir.h @@ -159,11 +159,11 @@ private: // process will log fatal. Status _check_incompatible_old_format_tablet(); - std::vector<std::string> _perform_path_scan(); + int _path_gc_step {0}; - void _perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths); + void _perform_tablet_gc(const std::string& tablet_schema_hash_path); - void _perform_path_gc_by_rowset(const std::vector<std::string>& tablet_paths); + void _perform_rowset_gc(const std::string& tablet_schema_hash_path); private: std::atomic<bool> _stop_bg_worker = false; diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index f4b20e99ad8..196b6dc9543 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -845,7 +845,7 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_ // For case 1 doesn't need path check because BE is just starting and not ready, // just check tablet meta status to judge whether tablet is delete is enough. // For case 2, If a tablet has just been copied to local BE, - // it may be cleared by gc-thread(see perform_path_gc_by_tablet) because the tablet meta may not be loaded to memory. + // it may be cleared by gc-thread(see perform_tablet_gc) because the tablet meta may not be loaded to memory. // So clone task should check path and then failed and retry in this case. if (check_path) { bool exists = true; diff --git a/be/src/runtime/exec_env.cpp b/be/src/runtime/exec_env.cpp index a2a031c7226..1573f2ba27c 100644 --- a/be/src/runtime/exec_env.cpp +++ b/be/src/runtime/exec_env.cpp @@ -45,9 +45,6 @@ ExecEnv::~ExecEnv() { // TODO(plat1ko): template <class Engine> #ifdef BE_TEST -void ExecEnv::set_storage_engine(std::unique_ptr<BaseStorageEngine>&& engine) { - _storage_engine = std::move(engine); -} void ExecEnv::set_write_cooldown_meta_executors() { _write_cooldown_meta_executors = std::make_unique<WriteCooldownMetaExecutors>(); } diff --git a/be/test/olap/path_gc_test.cpp b/be/test/olap/path_gc_test.cpp index 7a52b28d82a..edd8f4c1d27 100644 --- a/be/test/olap/path_gc_test.cpp +++ b/be/test/olap/path_gc_test.cpp @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include <gen_cpp/Types_types.h> #include <gtest/gtest.h> #include <random> @@ -83,23 +82,7 @@ TEST(PathGcTest, GcTabletAndRowset) { ASSERT_TRUE(st.ok()) << st; } - // Test path scan - auto paths = data_dir._perform_path_scan(); - ASSERT_EQ(paths.size(), 20); - // Test tablet gc - config::path_gc_check_step = 0; - data_dir._perform_path_gc_by_tablet(paths); - ASSERT_EQ(paths.size(), 10); - std::vector<std::string_view> expected_paths; - for (auto&& tablet : active_tablets) { - expected_paths.emplace_back(tablet->tablet_path()); - } - std::sort(expected_paths.begin(), expected_paths.end()); - std::sort(paths.begin(), paths.end()); - for (size_t i = 0; i < paths.size(); ++i) { - EXPECT_EQ(paths[i], expected_paths[i]); - } // Prepare rowsets auto rng = std::default_random_engine {static_cast<uint32_t>(::time(nullptr))}; @@ -187,7 +170,7 @@ TEST(PathGcTest, GcTabletAndRowset) { } // Test rowset gc - data_dir._perform_path_gc_by_rowset(paths); + data_dir.perform_path_gc(); for (auto&& t : active_tablets) { std::vector<io::FileInfo> files; bool exists; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org