This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new af779f5cd8f Pick "[fix](gclog) Skip tablet dir without schema hash dir 
in path gc (#32793)" (#35978)
af779f5cd8f is described below

commit af779f5cd8ff2ad2986313ea65b707a5b6bca217
Author: plat1ko <platonekos...@gmail.com>
AuthorDate: Thu Jun 6 22:24:30 2024 +0800

    Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc 
(#32793)" (#35978)
    
    ## Proposed changes
    Pick "[fix](gclog) Skip tablet dir without schema hash dir in path gc
    (#32793)"
---
 be/src/olap/data_dir.cpp       | 281 ++++++++++++++++++++---------------------
 be/src/olap/data_dir.h         |   6 +-
 be/src/olap/tablet_manager.cpp |   2 +-
 be/src/runtime/exec_env.cpp    |   3 -
 be/test/olap/path_gc_test.cpp  |  19 +--
 5 files changed, 140 insertions(+), 171 deletions(-)

diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 72604bcca94..03027184357 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -662,170 +662,144 @@ Status DataDir::load() {
     return Status::OK();
 }
 
-void DataDir::perform_path_gc() {
-    auto tablet_paths = _perform_path_scan();
-    _perform_path_gc_by_tablet(tablet_paths);
-    _perform_path_gc_by_rowset(tablet_paths);
-}
+// gc unused local tablet dir
+void DataDir::_perform_tablet_gc(const std::string& tablet_schema_hash_path) {
+    if (_stop_bg_worker) {
+        return;
+    }
 
-// gc unused tablet schemahash dir
-void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& 
tablet_paths) {
-    if (_stop_bg_worker) return;
-    if (tablet_paths.empty()) {
+    TTabletId tablet_id = -1;
+    TSchemaHash schema_hash = -1;
+    bool is_valid = TabletManager::get_tablet_id_and_schema_hash_from_path(
+            tablet_schema_hash_path, &tablet_id, &schema_hash);
+    if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
+        LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path;
         return;
     }
-    LOG(INFO) << "start to path gc by tablet, dir=" << _path;
-    // Use double pointer to avoid move elements after erase a element
-    auto forward = tablet_paths.begin();
-    auto backward = tablet_paths.end();
-    int counter = 0;
-    do {
-        if (config::path_gc_check_step > 0 && counter % 
config::path_gc_check_step == 0) {
-            std::this_thread::sleep_for(
-                    
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
-        }
-        auto& path = *forward;
-        TTabletId tablet_id = -1;
-        TSchemaHash schema_hash = -1;
-        bool is_valid = 
_tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
-                                                                               
  &schema_hash);
-        if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
-            LOG(WARNING) << "unknown path:" << path;
-            --backward;
-            std::swap(*forward, *backward);
-            continue;
-        }
-        auto tablet = _tablet_manager->get_tablet(tablet_id);
-        if (!tablet || tablet->data_dir() != this) {
-            if (tablet) {
-                LOG(INFO) << "The tablet in path " << path
-                          << " is not same with the running one: " << 
tablet->data_dir()->_path
-                          << "/" << tablet->tablet_path()
-                          << ", might be the old tablet after migration, try 
to move it to trash";
-            }
-            _tablet_manager->try_delete_unused_tablet_path(this, tablet_id, 
schema_hash, path);
-            --backward;
-            std::swap(*forward, *backward);
-            continue;
+
+    auto tablet = 
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
+    if (!tablet || tablet->data_dir() != this) {
+        if (tablet) {
+            LOG(INFO) << "The tablet in path " << tablet_schema_hash_path
+                      << " is not same with the running one: " << 
tablet->data_dir()->_path << "/"
+                      << tablet->tablet_path()
+                      << ", might be the old tablet after migration, try to 
move it to trash";
         }
-        // could find the tablet, then skip check it
-        ++forward;
-    } while (forward != backward && !_stop_bg_worker);
-    tablet_paths.erase(backward, tablet_paths.end());
-    LOG(INFO) << "finish path gc by tablet, dir=" << _path;
+        
StorageEngine::instance()->tablet_manager()->try_delete_unused_tablet_path(
+                this, tablet_id, schema_hash, tablet_schema_hash_path);
+        return;
+    }
+
+    _perform_rowset_gc(tablet_schema_hash_path);
 }
 
-void DataDir::_perform_path_gc_by_rowset(const std::vector<std::string>& 
tablet_paths) {
-    if (_stop_bg_worker || tablet_paths.empty()) {
+// gc unused local rowsets under tablet dir
+void DataDir::_perform_rowset_gc(const std::string& tablet_schema_hash_path) {
+    if (_stop_bg_worker) {
         return;
     }
 
-    LOG(INFO) << "start to path gc by rowset";
-    int counter = 0;
-    for (const auto& path : tablet_paths) {
-        if (_stop_bg_worker) {
-            break;
-        }
+    TTabletId tablet_id = -1;
+    TSchemaHash schema_hash = -1;
+    bool is_valid = 
doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
+            tablet_schema_hash_path, &tablet_id, &schema_hash);
+    if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
+        LOG(WARNING) << "[path gc] unknown path: " << tablet_schema_hash_path;
+        return;
+    }
 
-        ++counter;
-        if (config::path_gc_check_step > 0 && counter % 
config::path_gc_check_step == 0) {
-            std::this_thread::sleep_for(
-                    
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
-        }
-        TTabletId tablet_id = -1;
-        TSchemaHash schema_hash = -1;
-        bool is_valid = 
doris::TabletManager::get_tablet_id_and_schema_hash_from_path(
-                path, &tablet_id, &schema_hash);
-        if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
-            LOG(WARNING) << "[path gc] unknown path:" << path;
-            continue;
-        }
+    auto tablet = 
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id);
+    if (!tablet) {
+        // Could not found the tablet, maybe it's a dropped tablet, will be 
reclaimed
+        // in the next time `_perform_path_gc_by_tablet`
+        return;
+    }
 
-        auto tablet = _tablet_manager->get_tablet(tablet_id);
-        if (!tablet) {
-            // Could not found the tablet, maybe it's a dropped tablet, will 
be reclaimed
-            // in the next time `_perform_path_gc_by_tablet`
-            continue;
-        }
+    if (tablet->data_dir() != this) {
+        // Current running tablet is not in same data_dir, maybe it's a tablet 
after migration,
+        // will be reclaimed in the next time `_perform_path_gc_by_tablet`
+        return;
+    }
 
-        if (tablet->data_dir() != this) {
-            // Current running tablet is not in same data_dir, maybe it's a 
tablet after migration,
-            // will be reclaimed in the next time `_perform_path_gc_by_tablet`
-            continue;
+    bool exists;
+    std::vector<io::FileInfo> files;
+    auto st = io::global_local_filesystem()->list(tablet_schema_hash_path, 
true, &files, &exists);
+    if (!st.ok()) [[unlikely]] {
+        LOG(WARNING) << "[path gc] fail to list tablet path " << 
tablet_schema_hash_path << " : "
+                     << st;
+        return;
+    }
+
+    // Rowset files excluding pending rowsets
+    std::vector<std::pair<RowsetId, std::string /* filename */>> 
rowsets_not_pending;
+    for (auto&& file : files) {
+        auto rowset_id = extract_rowset_id(file.file_name);
+        if (rowset_id.hi == 0) {
+            continue; // Not a rowset
         }
 
-        bool exists;
-        std::vector<io::FileInfo> files;
-        auto st = io::global_local_filesystem()->list(path, true, &files, 
&exists);
-        if (!st.ok()) [[unlikely]] {
-            LOG(WARNING) << "[path gc] fail to list tablet path " << path << " 
: " << st;
-            continue;
+        if 
(StorageEngine::instance()->pending_local_rowsets().contains(rowset_id)) {
+            continue; // Pending rowset file
         }
 
-        // Rowset files excluding pending rowsets
-        std::vector<std::pair<RowsetId, std::string /* filename */>> 
rowsets_not_pending;
-        for (auto&& file : files) {
-            auto rowset_id = extract_rowset_id(file.file_name);
-            if (rowset_id.hi == 0) {
-                continue; // Not a rowset
-            }
+        rowsets_not_pending.emplace_back(rowset_id, std::move(file.file_name));
+    }
 
-            if 
(StorageEngine::instance()->pending_local_rowsets().contains(rowset_id)) {
-                continue; // Pending rowset file
-            }
+    RowsetIdUnorderedSet rowsets_in_version_map;
+    tablet->traverse_rowsets(
+            [&rowsets_in_version_map](auto& rs) { 
rowsets_in_version_map.insert(rs->rowset_id()); },
+            true);
 
-            rowsets_not_pending.emplace_back(rowset_id, 
std::move(file.file_name));
+    auto reclaim_rowset_file = [](const std::string& path) {
+        auto st = io::global_local_filesystem()->delete_file(path);
+        if (!st.ok()) [[unlikely]] {
+            LOG(WARNING) << "[path gc] failed to delete garbage rowset file: " 
<< st;
+            return;
         }
+        LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit log
+    };
 
-        RowsetIdUnorderedSet rowsets_in_version_map;
-        tablet->traverse_rowsets(
-                [&rowsets_in_version_map](auto& rs) {
-                    rowsets_in_version_map.insert(rs->rowset_id());
-                },
-                true);
+    auto should_reclaim = [&, this](const RowsetId& rowset_id) {
+        return !rowsets_in_version_map.contains(rowset_id) &&
+               
!StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) &&
+               RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), 
rowset_id)
+                       .is<META_KEY_NOT_FOUND>();
+    };
 
-        auto reclaim_rowset_file = [](const std::string& path) {
-            auto st = io::global_local_filesystem()->delete_file(path);
-            if (!st.ok()) [[unlikely]] {
-                LOG(WARNING) << "[path gc] failed to delete garbage rowset 
file: " << st;
-                return;
-            }
-            LOG(INFO) << "[path gc] delete garbage path: " << path; // Audit 
log
-        };
-
-        auto should_reclaim = [&, this](const RowsetId& rowset_id) {
-            return !rowsets_in_version_map.contains(rowset_id) &&
-                   
!StorageEngine::instance()->check_rowset_id_in_unused_rowsets(rowset_id) &&
-                   RowsetMetaManager::exists(get_meta(), tablet->tablet_uid(), 
rowset_id)
-                           .is<META_KEY_NOT_FOUND>();
-        };
-
-        // rowset_id -> is_garbage
-        std::unordered_map<RowsetId, bool> checked_rowsets;
-        for (auto&& [rowset_id, filename] : rowsets_not_pending) {
-            if (auto it = checked_rowsets.find(rowset_id); it != 
checked_rowsets.end()) {
-                if (it->second) { // Is checked garbage rowset
-                    reclaim_rowset_file(path + '/' + filename);
-                }
-                continue;
+    // rowset_id -> is_garbage
+    std::unordered_map<RowsetId, bool> checked_rowsets;
+    for (auto&& [rowset_id, filename] : rowsets_not_pending) {
+        if (_stop_bg_worker) {
+            return;
+        }
+
+        if (auto it = checked_rowsets.find(rowset_id); it != 
checked_rowsets.end()) {
+            if (it->second) { // Is checked garbage rowset
+                reclaim_rowset_file(tablet_schema_hash_path + '/' + filename);
             }
+            continue;
+        }
 
-            if (should_reclaim(rowset_id)) {
-                reclaim_rowset_file(path + '/' + filename);
-                checked_rowsets.emplace(rowset_id, true);
-            } else {
-                checked_rowsets.emplace(rowset_id, false);
+        if (should_reclaim(rowset_id)) {
+            if (config::path_gc_check_step > 0 &&
+                ++_path_gc_step % config::path_gc_check_step == 0) {
+                std::this_thread::sleep_for(
+                        
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
             }
+            reclaim_rowset_file(tablet_schema_hash_path + '/' + filename);
+            checked_rowsets.emplace(rowset_id, true);
+        } else {
+            checked_rowsets.emplace(rowset_id, false);
         }
     }
-    LOG(INFO) << "finish path gc by rowset.";
 }
 
-// path producer
-std::vector<std::string> DataDir::_perform_path_scan() {
-    std::vector<std::string> tablet_paths;
-    if (_stop_bg_worker) return tablet_paths;
-    LOG(INFO) << "start to scan data dir " << _path;
+void DataDir::perform_path_gc() {
+    if (_stop_bg_worker) {
+        return;
+    }
+
+    LOG(INFO) << "start to gc data dir " << _path;
     auto data_path = fmt::format("{}/{}", _path, DATA_PREFIX);
     std::vector<io::FileInfo> shards;
     bool exists = true;
@@ -833,13 +807,18 @@ std::vector<std::string> DataDir::_perform_path_scan() {
     auto st = fs->list(data_path, false, &shards, &exists);
     if (!st.ok()) [[unlikely]] {
         LOG(WARNING) << "failed to scan data dir: " << st;
-        return tablet_paths;
+        return;
     }
+
     for (const auto& shard : shards) {
-        if (_stop_bg_worker) break;
+        if (_stop_bg_worker) {
+            break;
+        }
+
         if (shard.is_file) {
             continue;
         }
+
         auto shard_path = fmt::format("{}/{}", data_path, shard.file_name);
         std::vector<io::FileInfo> tablet_ids;
         st = io::global_local_filesystem()->list(shard_path, false, 
&tablet_ids, &exists);
@@ -847,31 +826,41 @@ std::vector<std::string> DataDir::_perform_path_scan() {
             LOG(WARNING) << "fail to walk dir, shard_path=" << shard_path << " 
: " << st;
             continue;
         }
+
         for (const auto& tablet_id : tablet_ids) {
             if (_stop_bg_worker) {
                 break;
             }
+
             if (tablet_id.is_file) {
                 continue;
             }
+
             auto tablet_id_path = fmt::format("{}/{}", shard_path, 
tablet_id.file_name);
             std::vector<io::FileInfo> schema_hashes;
-            st = io::global_local_filesystem()->list(tablet_id_path, false, 
&schema_hashes,
-                                                     &exists);
+            st = fs->list(tablet_id_path, false, &schema_hashes, &exists);
             if (!st.ok()) [[unlikely]] {
                 LOG(WARNING) << "fail to walk dir, tablet_id_path=" << 
tablet_id_path << " : "
                              << st;
                 continue;
             }
-            if (schema_hashes.size() != 1 || schema_hashes[0].is_file) 
[[unlikely]] {
-                LOG(WARNING) << "invalid tablet_path, path=" << tablet_id_path;
-                continue;
+
+            for (auto&& schema_hash : schema_hashes) {
+                if (schema_hash.is_file) {
+                    continue;
+                }
+
+                if (config::path_gc_check_step > 0 &&
+                    ++_path_gc_step % config::path_gc_check_step == 0) {
+                    std::this_thread::sleep_for(
+                            
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
+                }
+                _perform_tablet_gc(tablet_id_path + '/' + 
schema_hash.file_name);
             }
-            tablet_paths.push_back(tablet_id_path + '/' + 
schema_hashes[0].file_name);
         }
     }
-    LOG(INFO) << "scan data dir path: " << _path << " finished. path size: " 
<< tablet_paths.size();
-    return tablet_paths;
+
+    LOG(INFO) << "gc data dir path: " << _path << " finished";
 }
 
 Status DataDir::update_capacity() {
diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h
index 424315b79a2..46abc75934d 100644
--- a/be/src/olap/data_dir.h
+++ b/be/src/olap/data_dir.h
@@ -159,11 +159,11 @@ private:
     // process will log fatal.
     Status _check_incompatible_old_format_tablet();
 
-    std::vector<std::string> _perform_path_scan();
+    int _path_gc_step {0};
 
-    void _perform_path_gc_by_tablet(std::vector<std::string>& tablet_paths);
+    void _perform_tablet_gc(const std::string& tablet_schema_hash_path);
 
-    void _perform_path_gc_by_rowset(const std::vector<std::string>& 
tablet_paths);
+    void _perform_rowset_gc(const std::string& tablet_schema_hash_path);
 
 private:
     std::atomic<bool> _stop_bg_worker = false;
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index f4b20e99ad8..196b6dc9543 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -845,7 +845,7 @@ Status TabletManager::load_tablet_from_meta(DataDir* 
data_dir, TTabletId tablet_
     // For case 1 doesn't need path check because BE is just starting and not 
ready,
     // just check tablet meta status to judge whether tablet is delete is 
enough.
     // For case 2, If a tablet has just been copied to local BE,
-    // it may be cleared by gc-thread(see perform_path_gc_by_tablet) because 
the tablet meta may not be loaded to memory.
+    // it may be cleared by gc-thread(see perform_tablet_gc) because the 
tablet meta may not be loaded to memory.
     // So clone task should check path and then failed and retry in this case.
     if (check_path) {
         bool exists = true;
diff --git a/be/src/runtime/exec_env.cpp b/be/src/runtime/exec_env.cpp
index a2a031c7226..1573f2ba27c 100644
--- a/be/src/runtime/exec_env.cpp
+++ b/be/src/runtime/exec_env.cpp
@@ -45,9 +45,6 @@ ExecEnv::~ExecEnv() {
 
 // TODO(plat1ko): template <class Engine>
 #ifdef BE_TEST
-void ExecEnv::set_storage_engine(std::unique_ptr<BaseStorageEngine>&& engine) {
-    _storage_engine = std::move(engine);
-}
 void ExecEnv::set_write_cooldown_meta_executors() {
     _write_cooldown_meta_executors = 
std::make_unique<WriteCooldownMetaExecutors>();
 }
diff --git a/be/test/olap/path_gc_test.cpp b/be/test/olap/path_gc_test.cpp
index 7a52b28d82a..edd8f4c1d27 100644
--- a/be/test/olap/path_gc_test.cpp
+++ b/be/test/olap/path_gc_test.cpp
@@ -15,7 +15,6 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <gen_cpp/Types_types.h>
 #include <gtest/gtest.h>
 
 #include <random>
@@ -83,23 +82,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
         ASSERT_TRUE(st.ok()) << st;
     }
 
-    // Test path scan
-    auto paths = data_dir._perform_path_scan();
-    ASSERT_EQ(paths.size(), 20);
-
     // Test tablet gc
-    config::path_gc_check_step = 0;
-    data_dir._perform_path_gc_by_tablet(paths);
-    ASSERT_EQ(paths.size(), 10);
-    std::vector<std::string_view> expected_paths;
-    for (auto&& tablet : active_tablets) {
-        expected_paths.emplace_back(tablet->tablet_path());
-    }
-    std::sort(expected_paths.begin(), expected_paths.end());
-    std::sort(paths.begin(), paths.end());
-    for (size_t i = 0; i < paths.size(); ++i) {
-        EXPECT_EQ(paths[i], expected_paths[i]);
-    }
 
     // Prepare rowsets
     auto rng = std::default_random_engine 
{static_cast<uint32_t>(::time(nullptr))};
@@ -187,7 +170,7 @@ TEST(PathGcTest, GcTabletAndRowset) {
     }
 
     // Test rowset gc
-    data_dir._perform_path_gc_by_rowset(paths);
+    data_dir.perform_path_gc();
     for (auto&& t : active_tablets) {
         std::vector<io::FileInfo> files;
         bool exists;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to