JackDrogon commented on code in PR #25921:
URL: https://github.com/apache/doris/pull/25921#discussion_r1372727588


##########
be/src/olap/data_dir.cpp:
##########
@@ -627,74 +626,61 @@ Status DataDir::load() {
 }
 
 void DataDir::perform_path_gc() {
-    std::unique_lock<std::mutex> lck(_check_path_mutex);
-    _check_path_cv.wait(lck, [this] {
-        return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() ||
-               !_all_check_paths.empty();
-    });
-    if (_stop_bg_worker) {
-        return;
-    }
-
-    _perform_path_gc_by_tablet();
-    _perform_path_gc_by_rowsetid();
+    auto tablet_paths = _perform_path_scan();
+    _perform_path_gc_by_tablet(tablet_paths);
+    _perform_path_gc_by_rowset(tablet_paths);
 }
 
 // gc unused tablet schemahash dir
-void DataDir::_perform_path_gc_by_tablet() {
-    if (_all_tablet_schemahash_paths.empty()) {
+void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& 
tablet_paths) {
+    if (_stop_bg_worker) return;
+    if (tablet_paths.empty()) {
         return;
     }
-    LOG(INFO) << "start to path gc by tablet schemahash.";
+    LOG(INFO) << "start to path gc by tablet, dir=" << _path;
+    // Use double pointer to avoid move elements after erase a element
+    auto forward = tablet_paths.begin();
+    auto backward = tablet_paths.end();
     int counter = 0;
-    for (const auto& path : _all_tablet_schemahash_paths) {
-        ++counter;
+    do {
         if (config::path_gc_check_step > 0 && counter % 
config::path_gc_check_step == 0) {
             std::this_thread::sleep_for(
                     
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
         }
+        auto& path = *forward;
         TTabletId tablet_id = -1;
         TSchemaHash schema_hash = -1;
         bool is_valid = 
_tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
                                                                                
  &schema_hash);
-        if (!is_valid) {
+        if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {

Review Comment:
   add comment for this decision condition



##########
be/src/olap/data_dir.cpp:
##########
@@ -627,74 +626,61 @@ Status DataDir::load() {
 }
 
 void DataDir::perform_path_gc() {
-    std::unique_lock<std::mutex> lck(_check_path_mutex);
-    _check_path_cv.wait(lck, [this] {
-        return _stop_bg_worker || !_all_tablet_schemahash_paths.empty() ||
-               !_all_check_paths.empty();
-    });
-    if (_stop_bg_worker) {
-        return;
-    }
-
-    _perform_path_gc_by_tablet();
-    _perform_path_gc_by_rowsetid();
+    auto tablet_paths = _perform_path_scan();
+    _perform_path_gc_by_tablet(tablet_paths);
+    _perform_path_gc_by_rowset(tablet_paths);
 }
 
 // gc unused tablet schemahash dir
-void DataDir::_perform_path_gc_by_tablet() {
-    if (_all_tablet_schemahash_paths.empty()) {
+void DataDir::_perform_path_gc_by_tablet(std::vector<std::string>& 
tablet_paths) {
+    if (_stop_bg_worker) return;
+    if (tablet_paths.empty()) {
         return;
     }
-    LOG(INFO) << "start to path gc by tablet schemahash.";
+    LOG(INFO) << "start to path gc by tablet, dir=" << _path;
+    // Use double pointer to avoid move elements after erase a element
+    auto forward = tablet_paths.begin();
+    auto backward = tablet_paths.end();
     int counter = 0;
-    for (const auto& path : _all_tablet_schemahash_paths) {
-        ++counter;
+    do {
         if (config::path_gc_check_step > 0 && counter % 
config::path_gc_check_step == 0) {
             std::this_thread::sleep_for(
                     
std::chrono::milliseconds(config::path_gc_check_step_interval_ms));
         }
+        auto& path = *forward;
         TTabletId tablet_id = -1;
         TSchemaHash schema_hash = -1;
         bool is_valid = 
_tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
                                                                                
  &schema_hash);
-        if (!is_valid) {
+        if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
             LOG(WARNING) << "unknown path:" << path;
+            --backward;
+            std::swap(*forward, *backward);
             continue;
         }
-        // should not happen, because already check it is a valid tablet 
schema hash path in previous step
-        // so that log fatal here
-        if (tablet_id < 1 || schema_hash < 1) {
-            LOG(WARNING) << "invalid tablet id " << tablet_id << " or schema 
hash " << schema_hash
-                         << ", path=" << path;
-            continue;
-        }
-        TabletSharedPtr tablet = _tablet_manager->get_tablet(tablet_id);
-        if (tablet != nullptr) {
-            // could find the tablet, then skip check it
-            continue;
-        }
-        std::string data_dir_path =
-                
io::Path(path).parent_path().parent_path().parent_path().parent_path();
-        DataDir* data_dir = 
StorageEngine::instance()->get_store(data_dir_path);
-        if (data_dir == nullptr) {
-            LOG(WARNING) << "could not find data dir for tablet path " << path;
+        auto tablet = _tablet_manager->get_tablet(tablet_id);
+        if (!tablet) {

Review Comment:
   use if initialize



##########
be/src/olap/data_dir.cpp:
##########
@@ -704,55 +690,115 @@ void DataDir::_perform_path_gc_by_rowsetid() {
         TSchemaHash schema_hash = -1;
         bool is_valid = 
_tablet_manager->get_tablet_id_and_schema_hash_from_path(path, &tablet_id,
                                                                                
  &schema_hash);
-        if (!is_valid) {
+        if (!is_valid || tablet_id < 1 || schema_hash < 1) [[unlikely]] {
             LOG(WARNING) << "unknown path:" << path;
             continue;
         }
-        if (tablet_id > 0 && schema_hash > 0) {
-            // tablet schema hash path or rowset file path
-            // gc thread should get tablet include deleted tablet
-            // or it will delete rowset file before tablet is garbage collected
+        bool exists;

Review Comment:
   add more space for improving readability



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to