platoneko commented on code in PR #25921:
URL: https://github.com/apache/doris/pull/25921#discussion_r1373053955


##########
be/src/olap/data_dir.cpp:
##########
@@ -627,74 +626,61 @@ Status DataDir::load() {
 }
 
 void DataDir::perform_path_gc() {
-    std::unique_lock<std::mutex> lck(_check_path_mutex);
-    _check_path_cv.wait(lck, [this] {
-        return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() ||
-               !_all_check_paths.empty();
-    });
-    if (_stop_bg_worker) {
-        return;
-    }
-
-    _perform_path_gc_by_tablet();
-    _perform_path_gc_by_rowsetid();
+    auto tablet_paths = _perform_path_scan();
+    _perform_path_gc_by_tablet(tablet_paths);
+    _perform_path_gc_by_rowset(tablet_paths);
 }
 
 // gc unused tablet schemahash dir
-void DataDir::_perform_path_gc_by_tablet() {
-    if (_all_tablet_schemahash_paths.empty()) {
+void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& 
tablet_paths) {
+    if (_stop_bg_worker) return;
+    if (tablet_paths.empty()) {
         return;
     }
-    LOG(INFO) << "start to path gc by tablet schemahash.";
+    LOG(INFO) << "start to path gc by tablet, dir=" << _path;
+    // Use double pointer to avoid move elements after erase a element
+    auto forward = tablet_paths.begin();
+    auto backward = tablet_paths.end();
     int counter = 0;
-    for (const auto& path : _all_tablet_schemahash_paths) {
-        ++counter;
+    do {
         if (config::path_gc_check_step > 0 && counter % 
config::path_gc_check_step == 0) {
             std::this_thread::sleep_for(
                     
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
         }
+        auto& path = *forward;
         TTabletId tablet_id = -1;
         TSchemaHash schema_hash = -1;
         bool is_valid = 
_tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
                                                                                
  &schema_hash);
-        if (!is_valid) {
+        if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
             LOG(WARNING) << "unknown path:" << path;
+            --backward;
+            std::swap(*forward, *backward);
             continue;
         }
-        // should not happen, because already check it is a valid tablet 
schema hash path in previous step
-        // so that log fatal here
-        if (tablet_id < 1 || schema_hash < 1) {
-            LOG(WARNING) << "invalid tablet id " << tablet_id << " or schema 
hash " << schema_hash
-                         << ", path=" << path;
-            continue;
-        }
-        TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id);
-        if (tablet != nullptr) {
-            // could find the tablet, then skip check it
-            continue;
-        }
-        std::string data_dir_path =
-                
io::Path(path).parent_path().parent_path().parent_path().parent_path();
-        DataDir* data_dir = 
StorageEngine::instance()->get_store(data_dir_path);
-        if (data_dir == nullptr) {
-            LOG(WARNING) << "could not find data dir for tablet path " << path;
+        auto tablet = _tablet_manager->get_tablet(tablet_id);
+        if (!tablet) {

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to