This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new fb72f2ab627 [fix](clone) Fix clone and alter tablet use same tablet 
path #34889 (#36791)
fb72f2ab627 is described below

commit fb72f2ab627c1f77c70c60efea7f32f543aa6da7
Author: deardeng <565620...@qq.com>
AuthorDate: Wed Jun 26 19:24:05 2024 +0800

    [fix](clone) Fix clone and alter tablet use same tablet path #34889 (#36791)
    
    cherry pick from #34889
---
 be/src/olap/data_dir.cpp                           |  40 +++---
 be/src/olap/data_dir.h                             |   4 +-
 be/src/olap/delta_writer.cpp                       |   5 -
 be/src/olap/schema_change.cpp                      |  10 --
 be/src/olap/storage_engine.cpp                     |   2 +
 be/src/olap/tablet_manager.cpp                     | 137 +++++++++++++++++----
 be/src/olap/tablet_manager.h                       |  16 ++-
 be/src/olap/task/engine_clone_task.cpp             |  31 +++--
 be/src/olap/task/engine_storage_migration_task.cpp |  10 +-
 .../test_drop_clone_tablet_path_race.groovy        |  85 +++++++++++++
 10 files changed, 266 insertions(+), 74 deletions(-)

diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index a12c9155439..12086586b41 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -627,16 +627,6 @@ Status DataDir::load() {
     return Status::OK();
 }
 
-void DataDir::add_pending_ids(const std::string& id) {
-    std::lock_guard<std::shared_mutex> wr_lock(_pending_path_mutex);
-    _pending_path_ids.insert(id);
-}
-
-void DataDir::remove_pending_ids(const std::string& id) {
-    std::lock_guard<std::shared_mutex> wr_lock(_pending_path_mutex);
-    _pending_path_ids.erase(id);
-}
-
 void DataDir::perform_path_gc() {
     std::unique_lock<std::mutex> lck(_check_path_mutex);
     _check_path_cv.wait(lck, [this] {
@@ -684,6 +674,8 @@ void DataDir::_perform_path_gc_by_tablet() {
             // could find the tablet, then skip check it
             continue;
         }
+        // data_dir_path/data/8/10031/1785511963
+        // data_dir_path/
         std::string data_dir_path =
                 
io::Path(path).parent_path().parent_path().parent_path().parent_path();
         DataDir* data_dir = 
StorageEngine::instance()->get_store(data_dir_path);
@@ -691,7 +683,19 @@ void DataDir::_perform_path_gc_by_tablet() {
             LOG(WARNING) << "could not find data dir for tablet path " << path;
             continue;
         }
-        _tablet_manager->try_delete_unused_tablet_path(data_dir, tablet_id, 
schema_hash, path);
+        // data_dir_path/data/8
+        std::string shard_path = io::Path(path).parent_path().parent_path();
+        std::filesystem::path sp(shard_path);
+        int16_t shard_id = -1;
+        try {
+            // 8
+            shard_id = std::stoi(sp.filename().string());
+        } catch (const std::exception&) {
+            LOG(WARNING) << "failed to stoi shard_id, shard name=" << 
sp.filename().string();
+            continue;
+        }
+        _tablet_manager->try_delete_unused_tablet_path(data_dir, tablet_id, 
schema_hash, path,
+                                                       shard_id);
     }
     _all_tablet_schemahash_paths.clear();
     LOG(INFO) << "finished one time path gc by tablet.";
@@ -840,11 +844,6 @@ void DataDir::_process_garbage_path(const std::string& 
path) {
     }
 }
 
-bool DataDir::_check_pending_ids(const std::string& id) {
-    std::shared_lock rd_lock(_pending_path_mutex);
-    return _pending_path_ids.find(id) != _pending_path_ids.end();
-}
-
 Status DataDir::update_capacity() {
     RETURN_IF_ERROR(io::global_local_filesystem()->get_space_info(_path, 
&_disk_capacity_bytes,
                                                                   
&_available_bytes));
@@ -947,8 +946,16 @@ Status DataDir::move_to_trash(const std::string& 
tablet_path) {
     }
 
     // 5. check parent dir of source file, delete it when empty
+    RETURN_IF_ERROR(delete_tablet_parent_path_if_empty(tablet_path));
+
+    return Status::OK();
+}
+
+Status DataDir::delete_tablet_parent_path_if_empty(const std::string& 
tablet_path) {
+    auto fs_tablet_path = io::Path(tablet_path);
     std::string source_parent_dir = fs_tablet_path.parent_path(); // tablet_id 
level
     std::vector<io::FileInfo> sub_files;
+    bool exists = true;
     RETURN_IF_ERROR(
             io::global_local_filesystem()->list(source_parent_dir, false, 
&sub_files, &exists));
     if (sub_files.empty()) {
@@ -956,7 +963,6 @@ Status DataDir::move_to_trash(const std::string& 
tablet_path) {
         // no need to exam return status
         io::global_local_filesystem()->delete_directory(source_parent_dir);
     }
-
     return Status::OK();
 }
 
diff --git a/be/src/olap/data_dir.h b/be/src/olap/data_dir.h
index 81c74f3bb2e..cf587b6d0db 100644
--- a/be/src/olap/data_dir.h
+++ b/be/src/olap/data_dir.h
@@ -156,6 +156,8 @@ public:
     // Move tablet to trash.
     Status move_to_trash(const std::string& tablet_path);
 
+    static Status delete_tablet_parent_path_if_empty(const std::string& 
tablet_path);
+
 private:
     Status _init_cluster_id();
     Status _init_capacity_and_create_shards();
@@ -174,7 +176,7 @@ private:
 
     void _remove_check_paths(const std::set<std::string>& paths);
 
-    bool _check_pending_ids(const std::string& id);
+    void _perform_tablet_gc(const std::string& tablet_schema_hash_path, 
int16_t shard_name);
 
     void _perform_path_gc_by_tablet();
 
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index f33040de2cc..ec699205aed 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -129,11 +129,6 @@ DeltaWriter::~DeltaWriter() {
         _calc_delete_bitmap_token->cancel();
     }
 
-    if (_tablet != nullptr) {
-        _tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX +
-                                                
_rowset_writer->rowset_id().to_string());
-    }
-
     _mem_table.reset();
 }
 
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 2ce03076117..822946cbab2 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -627,12 +627,6 @@ Status VSchemaChangeWithSorting::_internal_sorting(
     context.newest_write_timestamp = newest_write_timestamp;
     context.write_type = DataWriteType::TYPE_SCHEMA_CHANGE;
     RETURN_IF_ERROR(new_tablet->create_rowset_writer(context, &rowset_writer));
-
-    Defer defer {[&]() {
-        new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX +
-                                                   
rowset_writer->rowset_id().to_string());
-    }};
-
     RETURN_IF_ERROR(merger.merge(blocks, rowset_writer.get(), &merged_rows));
 
     _add_merged_rows(merged_rows);
@@ -1108,12 +1102,8 @@ Status 
SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangeParams
             LOG(WARNING) << "failed to process the version."
                          << " version=" << rs_reader->version().first << "-"
                          << rs_reader->version().second << ", " << 
res.to_string();
-            new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX +
-                                                       
rowset_writer->rowset_id().to_string());
             return process_alter_exit();
         }
-        new_tablet->data_dir()->remove_pending_ids(ROWSET_ID_PREFIX +
-                                                   
rowset_writer->rowset_id().to_string());
         // Add the new version of the data to the header
         // In order to prevent the occurrence of deadlock, we must first lock 
the old table, and then lock the new table
         std::lock_guard<std::mutex> 
lock(sc_params.new_tablet->get_push_lock());
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index 0e633c81ef4..3e44daabd2d 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -1039,6 +1039,8 @@ Status StorageEngine::_do_sweep(const std::string& 
scan_root, const time_t& loca
             string path_name = sorted_path.string();
             if (difftime(local_now, mktime(&local_tm_create)) >= 
actual_expire) {
                 res = 
io::global_local_filesystem()->delete_directory(path_name);
+                LOG(INFO) << "do sweep delete directory " << path_name << " 
local_now " << local_now
+                          << "actual_expire " << actual_expire << " res " << 
res;
                 if (!res.ok()) {
                     continue;
                 }
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 75dc5555e39..c29ab4a3105 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -59,6 +59,7 @@
 #include "runtime/memory/mem_tracker.h"
 #include "runtime/thread_context.h"
 #include "service/backend_options.h"
+#include "util/defer_op.h"
 #include "util/doris_metrics.h"
 #include "util/histogram.h"
 #include "util/metrics.h"
@@ -373,7 +374,6 @@ TabletSharedPtr 
TabletManager::_internal_create_tablet_unlocked(
 
     // should remove the tablet's pending_id no matter create-tablet success 
or not
     DataDir* data_dir = tablet->data_dir();
-    SCOPED_CLEANUP({ data_dir->remove_pending_ids(StrCat(TABLET_ID_PREFIX, 
new_tablet_id)); });
 
     // TODO(yiguolei)
     // the following code is very difficult to understand because it mixed 
alter tablet v2
@@ -463,15 +463,9 @@ TabletSharedPtr 
TabletManager::_create_tablet_meta_and_dir_unlocked(
     string pending_id = StrCat(TABLET_ID_PREFIX, request.tablet_id);
     // Many attempts are made here in the hope that even if a disk fails, it 
can still continue.
     std::string parent_timer_name = "CreateMeta";
-    DataDir* last_dir = nullptr;
     MonotonicStopWatch watch;
     watch.start();
     for (auto& data_dir : data_dirs) {
-        if (last_dir != nullptr) {
-            // If last_dir != null, it means the last attempt to create a 
tablet failed
-            last_dir->remove_pending_ids(pending_id);
-        }
-        last_dir = data_dir;
         COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RemovePendingIds", 
parent_timer_name),
                        static_cast<int64_t>(watch.reset()));
 
@@ -503,13 +497,17 @@ TabletSharedPtr 
TabletManager::_create_tablet_meta_and_dir_unlocked(
             LOG(WARNING) << "skip this dir because tablet path exist, path=" 
<< schema_hash_dir;
             continue;
         } else {
-            data_dir->add_pending_ids(pending_id);
             Status st = 
io::global_local_filesystem()->create_directory(schema_hash_dir);
             if (!st.ok()) {
                 continue;
             }
         }
 
+        if (tablet_meta->partition_id() <= 0) {
+            LOG(WARNING) << "invalid partition id " << 
tablet_meta->partition_id() << ", tablet "
+                         << tablet_meta->tablet_id();
+        }
+
         TabletSharedPtr new_tablet = 
Tablet::create_tablet_from_meta(tablet_meta, data_dir);
         COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "CreateTabletFromMeta", 
parent_timer_name),
                        static_cast<int64_t>(watch.reset()));
@@ -523,10 +521,6 @@ Status TabletManager::drop_tablet(TTabletId tablet_id, 
TReplicaId replica_id,
                                   bool is_drop_table_or_partition) {
     auto& shard = _get_tablets_shard(tablet_id);
     std::lock_guard wrlock(shard.lock);
-    if (shard.tablets_under_clone.count(tablet_id) > 0) {
-        return Status::Aborted("tablet {} is under clone, skip drop task", 
tablet_id);
-    }
-    SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
     return _drop_tablet_unlocked(tablet_id, replica_id, false, 
is_drop_table_or_partition);
 }
 
@@ -537,6 +531,9 @@ Status TabletManager::_drop_tablet_unlocked(TTabletId 
tablet_id, TReplicaId repl
               << ", is_drop_table_or_partition=" << is_drop_table_or_partition;
     DorisMetrics::instance()->drop_tablet_requests_total->increment(1);
 
+    RETURN_IF_ERROR(register_transition_tablet(tablet_id, "drop tablet"));
+    Defer defer {[&]() { unregister_transition_tablet(tablet_id, "drop 
tablet"); }};
+
     // Fetch tablet which need to be dropped
     TabletSharedPtr to_drop_tablet = _get_tablet_unlocked(tablet_id);
     if (to_drop_tablet == nullptr) {
@@ -544,12 +541,14 @@ Status TabletManager::_drop_tablet_unlocked(TTabletId 
tablet_id, TReplicaId repl
                      << "tablet_id=" << tablet_id;
         return Status::OK();
     }
+
     // We should compare replica id to avoid dropping new cloned tablet.
     // Iff request replica id is 0, FE may be an older release, then we drop 
this tablet as before.
     if (to_drop_tablet->replica_id() != replica_id && replica_id != 0) {
         return Status::Aborted("replica_id not match({} vs {})", 
to_drop_tablet->replica_id(),
                                replica_id);
     }
+
     _remove_tablet_from_partition(to_drop_tablet);
     tablet_map_t& tablet_map = _get_tablet_map(tablet_id);
     tablet_map.erase(tablet_id);
@@ -1057,6 +1056,7 @@ Status 
TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
 }
 
 Status TabletManager::start_trash_sweep() {
+    DBUG_EXECUTE_IF("TabletManager.start_trash_sweep.sleep", DBUG_BLOCK);
     std::unique_lock<std::mutex> lock(_gc_tablets_lock, std::defer_lock);
     if (!lock.try_lock()) {
         return Status::OK();
@@ -1130,6 +1130,33 @@ Status TabletManager::start_trash_sweep() {
 }
 
 bool TabletManager::_move_tablet_to_trash(const TabletSharedPtr& tablet) {
+    RETURN_IF_ERROR(register_transition_tablet(tablet->tablet_id(), "move to 
trash"));
+    Defer defer {[&]() { unregister_transition_tablet(tablet->tablet_id(), 
"move to trash"); }};
+
+    TabletSharedPtr tablet_in_not_shutdown = get_tablet(tablet->tablet_id());
+    if (tablet_in_not_shutdown) {
+        TSchemaHash schema_hash_not_shutdown = 
tablet_in_not_shutdown->schema_hash();
+        size_t path_hash_not_shutdown = 
tablet_in_not_shutdown->data_dir()->path_hash();
+        if (tablet->schema_hash() == schema_hash_not_shutdown &&
+            tablet->data_dir()->path_hash() == path_hash_not_shutdown) {
+            tablet->clear_cache();
+            // shard_id in memory not eq shard_id in shutdown
+            if (tablet_in_not_shutdown->tablet_path() != 
tablet->tablet_path()) {
+                LOG(INFO) << "tablet path not eq shutdown tablet path, move it 
to trash, tablet_id="
+                          << tablet_in_not_shutdown->tablet_id()
+                          << " mem manager tablet path=" << 
tablet_in_not_shutdown->tablet_path()
+                          << " shutdown tablet path=" << tablet->tablet_path();
+                return 
tablet->data_dir()->move_to_trash(tablet->tablet_path());
+            } else {
+                LOG(INFO) << "tablet path eq shutdown tablet path, not move to 
trash, tablet_id="
+                          << tablet_in_not_shutdown->tablet_id()
+                          << " mem manager tablet path=" << 
tablet_in_not_shutdown->tablet_path()
+                          << " shutdown tablet path=" << tablet->tablet_path();
+                return true;
+            }
+        }
+    }
+
     TabletMetaSharedPtr tablet_meta(new TabletMeta());
     int64_t get_meta_ts = MonotonicMicros();
     Status check_st = TabletMetaManager::get_meta(tablet->data_dir(), 
tablet->tablet_id(),
@@ -1197,6 +1224,15 @@ bool TabletManager::_move_tablet_to_trash(const 
TabletSharedPtr& tablet) {
             return false;
         }
         if (exists) {
+            if (check_st.is<META_KEY_NOT_FOUND>()) {
+                LOG(INFO) << "could not find tablet meta in rocksdb, so just 
delete it path "
+                          << "tablet_id=" << tablet->tablet_id()
+                          << ", schema_hash=" << tablet->schema_hash()
+                          << ", delete tablet_path=" << tablet_path;
+                
RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_path));
+                
RETURN_IF_ERROR(DataDir::delete_tablet_parent_path_if_empty(tablet_path));
+                return true;
+            }
             LOG(WARNING) << "errors while load meta from store, skip this 
tablet. "
                          << "tablet_id=" << tablet->tablet_id()
                          << ", schema_hash=" << tablet->schema_hash();
@@ -1211,21 +1247,68 @@ bool TabletManager::_move_tablet_to_trash(const 
TabletSharedPtr& tablet) {
     }
 }
 
-bool TabletManager::register_clone_tablet(int64_t tablet_id) {
+Status TabletManager::register_transition_tablet(int64_t tablet_id, 
std::string reason) {
     tablets_shard& shard = _get_tablets_shard(tablet_id);
-    std::lock_guard<std::shared_mutex> wrlock(shard.lock);
-    return shard.tablets_under_clone.insert(tablet_id).second;
+    std::thread::id thread_id = std::this_thread::get_id();
+    std::lock_guard<std::mutex> lk(shard.lock_for_transition);
+    if (auto search = shard.tablets_under_transition.find(tablet_id);
+        search == shard.tablets_under_transition.end()) {
+        // not found
+        shard.tablets_under_transition[tablet_id] = std::make_tuple(reason, 
thread_id, 1);
+        LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << 
reason
+                  << " lock times=1 thread_id_in_map=" << thread_id;
+        return Status::OK();
+    } else {
+        // found
+        auto& [r, thread_id_in_map, lock_times] = search->second;
+        if (thread_id != thread_id_in_map) {
+            // other thread, failed
+            LOG(INFO) << "tablet_id = " << tablet_id << " is doing " << r
+                      << " thread_id_in_map=" << thread_id_in_map << " , add 
reason=" << reason
+                      << " thread_id=" << thread_id;
+            return Status::InternalError<false>("{} failed try later, 
tablet_id={}", reason,
+                                                tablet_id);
+        }
+        // add lock times
+        ++lock_times;
+        LOG(INFO) << "add tablet_id= " << tablet_id << " to map, reason=" << 
reason
+                  << " lock times=" << lock_times << " thread_id_in_map=" << 
thread_id_in_map;
+        return Status::OK();
+    }
 }
 
-void TabletManager::unregister_clone_tablet(int64_t tablet_id) {
+void TabletManager::unregister_transition_tablet(int64_t tablet_id, 
std::string reason) {
     tablets_shard& shard = _get_tablets_shard(tablet_id);
-    std::lock_guard<std::shared_mutex> wrlock(shard.lock);
-    shard.tablets_under_clone.erase(tablet_id);
+    std::thread::id thread_id = std::this_thread::get_id();
+    std::lock_guard<std::mutex> lk(shard.lock_for_transition);
+    if (auto search = shard.tablets_under_transition.find(tablet_id);
+        search == shard.tablets_under_transition.end()) {
+        // impossible, bug
+        DCHECK(false) << "tablet " << tablet_id
+                      << " must be found, before unreg must have been reg";
+    } else {
+        auto& [r, thread_id_in_map, lock_times] = search->second;
+        if (thread_id_in_map != thread_id) {
+            // impossible, bug
+            DCHECK(false) << "tablet " << tablet_id << " unreg thread must 
same reg thread";
+        }
+        // sub lock times
+        --lock_times;
+        if (lock_times != 0) {
+            LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, 
reason=" << reason
+                      << " left=" << lock_times << " thread_id_in_map=" << 
thread_id_in_map;
+        } else {
+            LOG(INFO) << "erase tablet_id= " << tablet_id << " from map, 
reason=" << reason
+                      << " thread_id_in_map=" << thread_id_in_map;
+            shard.tablets_under_transition.erase(tablet_id);
+        }
+    }
 }
 
 void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId 
tablet_id,
                                                   SchemaHash schema_hash,
-                                                  const string& 
schema_hash_path) {
+                                                  const string& 
schema_hash_path,
+                                                  int16_t shard_id) {
     SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
     // acquire the read lock, so that there is no creating tablet or load 
tablet from meta tasks
     // create tablet and load tablet task should check whether the dir exists
@@ -1235,13 +1318,21 @@ void 
TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId t
     // check if meta already exists
     TabletMetaSharedPtr tablet_meta(new TabletMeta());
     Status check_st = TabletMetaManager::get_meta(data_dir, tablet_id, 
schema_hash, tablet_meta);
-    if (check_st.ok()) {
-        LOG(INFO) << "tablet meta exists in meta store, skip delete the path " 
<< schema_hash_path;
+    if (check_st.ok() && tablet_meta->shard_id() == shard_id) {
+        return;
+    }
+
+    LOG(INFO) << "tablet meta not exists, try delete tablet path " << 
schema_hash_path;
+
+    bool succ = register_transition_tablet(tablet_id, "path gc");
+    if (!succ) {
         return;
     }
+    Defer defer {[&]() { unregister_transition_tablet(tablet_id, "path gc"); 
}};
 
-    if (shard.tablets_under_clone.count(tablet_id) > 0) {
-        LOG(INFO) << "tablet is under clone, skip delete the path " << 
schema_hash_path;
+    TabletSharedPtr tablet = _get_tablet_unlocked(tablet_id);
+    if (tablet != nullptr && tablet->tablet_path() == schema_hash_path) {
+        LOG(INFO) << "tablet , skip delete the path " << schema_hash_path;
         return;
     }
 
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index e439804adb6..07c9d563b87 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -140,7 +140,8 @@ public:
     Status start_trash_sweep();
 
     void try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id,
-                                       SchemaHash schema_hash, const 
std::string& schema_hash_path);
+                                       SchemaHash schema_hash, const 
std::string& schema_hash_path,
+                                       int16_t shard_id);
 
     void update_root_path_info(std::map<std::string, DataDirInfo>* path_map,
                                size_t* tablet_counter);
@@ -152,8 +153,8 @@ public:
     void obtain_specific_quantity_tablets(std::vector<TabletInfo>& 
tablets_info, int64_t num);
 
     // return `true` if register success
-    bool register_clone_tablet(int64_t tablet_id);
-    void unregister_clone_tablet(int64_t tablet_id);
+    Status register_transition_tablet(int64_t tablet_id, std::string reason);
+    void unregister_transition_tablet(int64_t tablet_id, std::string reason);
 
     void get_tablets_distribution_on_different_disks(
             std::map<int64_t, std::map<DataDir*, int64_t>>& 
tablets_num_on_disk,
@@ -220,12 +221,15 @@ private:
         tablets_shard() = default;
         tablets_shard(tablets_shard&& shard) {
             tablet_map = std::move(shard.tablet_map);
-            tablets_under_clone = std::move(shard.tablets_under_clone);
+            tablets_under_transition = 
std::move(shard.tablets_under_transition);
         }
-        // protect tablet_map, tablets_under_clone and tablets_under_restore
         mutable std::shared_mutex lock;
         tablet_map_t tablet_map;
-        std::set<int64_t> tablets_under_clone;
+        std::mutex lock_for_transition;
+        // tablet do clone, path gc, move to trash, disk migrate will record 
in tablets_under_transition
+        // tablet <reason, thread_id, lock_times>
+        std::map<int64_t, std::tuple<std::string, std::thread::id, int64_t>>
+                tablets_under_transition;
     };
 
     // trace the memory use by meta of tablet
diff --git a/be/src/olap/task/engine_clone_task.cpp 
b/be/src/olap/task/engine_clone_task.cpp
index c71f245f58e..67206ec40dd 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -153,12 +153,7 @@ EngineCloneTask::EngineCloneTask(const TCloneReq& 
clone_req, const TMasterInfo&
 }
 
 Status EngineCloneTask::execute() {
-    // register the tablet to avoid it is deleted by gc thread during clone 
process
-    if 
(!StorageEngine::instance()->tablet_manager()->register_clone_tablet(_clone_req.tablet_id))
 {
-        return Status::InternalError("tablet {} is under clone", 
_clone_req.tablet_id);
-    }
     Status st = _do_clone();
-    
StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id);
     return st;
 }
 
@@ -166,6 +161,13 @@ Status EngineCloneTask::_do_clone() {
     Status status = Status::OK();
     string src_file_path;
     TBackend src_host;
+    
RETURN_IF_ERROR(StorageEngine::instance()->tablet_manager()->register_transition_tablet(
+            _clone_req.tablet_id, "clone"));
+    Defer defer {[&]() {
+        
StorageEngine::instance()->tablet_manager()->unregister_transition_tablet(
+                _clone_req.tablet_id, "clone");
+    }};
+
     // Check local tablet exist or not
     TabletSharedPtr tablet =
             
StorageEngine::instance()->tablet_manager()->get_tablet(_clone_req.tablet_id);
@@ -176,14 +178,8 @@ Status EngineCloneTask::_do_clone() {
     if (tablet && tablet->tablet_state() == TABLET_NOTREADY) {
         LOG(WARNING) << "tablet state is not ready when clone, need to drop 
old tablet, tablet_id="
                      << tablet->tablet_id();
-        // can not drop tablet when under clone. so unregister clone tablet 
firstly.
-        
StorageEngine::instance()->tablet_manager()->unregister_clone_tablet(_clone_req.tablet_id);
         
RETURN_IF_ERROR(StorageEngine::instance()->tablet_manager()->drop_tablet(
                 tablet->tablet_id(), tablet->replica_id(), false));
-        if 
(!StorageEngine::instance()->tablet_manager()->register_clone_tablet(
-                    _clone_req.tablet_id)) {
-            return Status::InternalError("tablet {} is under clone", 
_clone_req.tablet_id);
-        }
         tablet.reset();
     }
     bool is_new_tablet = tablet == nullptr;
@@ -267,8 +263,21 @@ Status EngineCloneTask::_do_clone() {
                       << ". signature: " << _signature;
             
WARN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_dir),
                           "failed to delete useless clone dir ");
+            
WARN_IF_ERROR(DataDir::delete_tablet_parent_path_if_empty(tablet_dir),
+                          "failed to delete parent dir");
         }};
 
+        bool exists = true;
+        Status exists_st = io::global_local_filesystem()->exists(tablet_dir, 
&exists);
+        if (!exists_st) {
+            LOG(WARNING) << "cant get path=" << tablet_dir << " state, st=" << 
exists_st;
+            return exists_st;
+        }
+        if (exists) {
+            LOG(WARNING) << "before clone dest path=" << tablet_dir << " 
exist, remote it first";
+            
RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(tablet_dir));
+        }
+
         bool allow_incremental_clone = false;
         RETURN_IF_ERROR_(status,
                          _make_and_download_snapshots(*store, tablet_dir, 
&src_host, &src_file_path,
diff --git a/be/src/olap/task/engine_storage_migration_task.cpp 
b/be/src/olap/task/engine_storage_migration_task.cpp
index 60ab1dfe796..f0f2f780d4c 100644
--- a/be/src/olap/task/engine_storage_migration_task.cpp
+++ b/be/src/olap/task/engine_storage_migration_task.cpp
@@ -197,6 +197,13 @@ Status EngineStorageMigrationTask::_migrate() {
     LOG(INFO) << "begin to process tablet migrate. "
               << "tablet_id=" << tablet_id << ", dest_store=" << 
_dest_store->path();
 
+    
RETURN_IF_ERROR(StorageEngine::instance()->tablet_manager()->register_transition_tablet(
+            _tablet->tablet_id(), "disk migrate"));
+    Defer defer {[&]() {
+        
StorageEngine::instance()->tablet_manager()->unregister_transition_tablet(
+                _tablet->tablet_id(), "disk migrate");
+    }};
+
     DorisMetrics::instance()->storage_migrate_requests_total->increment(1);
     int32_t start_version = 0;
     int32_t end_version = 0;
@@ -310,7 +317,8 @@ Status EngineStorageMigrationTask::_migrate() {
 
     if (!res.ok()) {
         // we should remove the dir directly for avoid disk full of junk data, 
and it's safe to remove
-        io::global_local_filesystem()->delete_directory(full_path);
+        
RETURN_IF_ERROR(io::global_local_filesystem()->delete_directory(full_path));
+        
RETURN_IF_ERROR(DataDir::delete_tablet_parent_path_if_empty(full_path));
     }
     return res;
 }
diff --git 
a/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy 
b/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy
new file mode 100644
index 00000000000..ebf1259a72f
--- /dev/null
+++ b/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+import org.junit.Assert
+
+suite('test_drop_clone_tablet_path_race') {
+    if (isCloudMode()) {
+        return
+    }
+    def options = new ClusterOptions()
+    options.enableDebugPoints()
+    options.feConfigs += [
+        'tablet_checker_interval_ms=100',
+        'schedule_slot_num_per_hdd_path=1000',
+        'storage_high_watermark_usage_percent=99',
+        'storage_flood_stage_usage_percent=99',
+    ]
+    options.beNum = 3
+    docker(options) {
+        def table = "t1"
+        def checkFunc = {size -> 
+            boolean succ = false
+            for (int i = 0; i < 120; i++) {
+                def result = sql_return_maparray """SHOW TABLETS FROM 
${table}"""
+                if (result.size() == size) {
+                    def version = result[0].Version
+                    def state = result[0].State
+                    succ = result.every { it.Version.equals(version) && 
it.State.equals(state) }
+                    if (succ) {
+                        break
+                    }
+                }
+                sleep(1000)
+            }
+            Assert.assertTrue(succ)
+        }
+
+        sql """DROP TABLE IF EXISTS ${table}"""
+        sql """
+            CREATE TABLE `${table}` (
+            `id` int(11) NULL,
+            `name` varchar(255) NULL,
+            `score` int(11) SUM NULL
+            ) ENGINE=OLAP
+            AGGREGATE KEY(`id`, `name`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(`id`) BUCKETS 10
+            PROPERTIES (
+                    'replication_num' = '3'
+            );
+        """
+
+        try {
+            // 10h
+            
GetDebugPoint().enableDebugPointForAllBEs("TabletManager.start_trash_sweep.sleep")
+            for(int i= 0; i < 100; ++i) {
+                sql """INSERT INTO ${table} values (${i}, "${i}str", ${i} * 
100)"""
+            }
+
+            sql """ALTER TABLE ${table} MODIFY PARTITION(${table}) SET 
("replication_num" = "2")"""
+
+            checkFunc(20)
+
+            sql """ALTER TABLE ${table} MODIFY PARTITION(${table}) SET 
("replication_num" = "3")"""
+            checkFunc(30)
+        } finally {
+            
GetDebugPoint().disableDebugPointForAllBEs("TabletManager.start_trash_sweep.sleep")
+        }
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to