This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 97ce93ed58 [fix](create tablet) fix backend create tablet timeout 
(#23885)
97ce93ed58 is described below

commit 97ce93ed58d344b9e879e94cc2772bf8bc1fe0e2
Author: yujun <yu.jun.re...@gmail.com>
AuthorDate: Sun Sep 10 11:13:38 2023 +0800

    [fix](create tablet) fix backend create tablet timeout (#23885)
---
 be/src/olap/tablet.cpp         |   1 +
 be/src/olap/tablet_manager.cpp | 308 ++++++++++++++++++++---------------------
 be/src/olap/tablet_manager.h   |  11 +-
 3 files changed, 157 insertions(+), 163 deletions(-)

diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index 968b6ad9d4..98ef536d9f 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2404,6 +2404,7 @@ void Tablet::update_self_owned_remote_rowsets(
 }
 
 bool Tablet::check_all_rowset_segment() {
+    std::shared_lock rdlock(_meta_lock);
     for (auto& version_rowset : _rs_version_map) {
         RowsetSharedPtr rowset = version_rowset.second;
         if (!rowset->check_rowset_segment()) {
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index a523ec620d..14c2489c24 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -599,36 +599,41 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId 
tablet_id, TabletUid tablet_
 
 std::vector<TabletSharedPtr> 
TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) {
     std::vector<TabletSharedPtr> res;
+    for_each_tablet([&](const TabletSharedPtr& tablet) { 
res.emplace_back(tablet); },
+                    std::move(filter));
+    return res;
+}
+
+void TabletManager::for_each_tablet(std::function<void(const 
TabletSharedPtr&)>&& handler,
+                                    std::function<bool(Tablet*)>&& filter) {
+    std::vector<TabletSharedPtr> tablets;
     for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (auto& [id, tablet] : tablets_shard.tablet_map) {
-            if (filter(tablet.get())) {
-                res.emplace_back(tablet);
+        tablets.clear();
+        {
+            std::shared_lock rdlock(tablets_shard.lock);
+            for (const auto& [id, tablet] : tablets_shard.tablet_map) {
+                if (filter(tablet.get())) {
+                    tablets.emplace_back(tablet);
+                }
             }
         }
+        for (const auto& tablet : tablets) {
+            handler(tablet);
+        }
     }
-    return res;
 }
 
 uint64_t TabletManager::get_rowset_nums() {
     uint64_t rowset_nums = 0;
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& tablet_map : tablets_shard.tablet_map) {
-            rowset_nums += tablet_map.second->version_count();
-        }
-    }
+    for_each_tablet([&](const TabletSharedPtr& tablet) { rowset_nums += 
tablet->version_count(); },
+                    filter_all_tablets);
     return rowset_nums;
 }
 
 uint64_t TabletManager::get_segment_nums() {
     uint64_t segment_nums = 0;
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& tablet_map : tablets_shard.tablet_map) {
-            segment_nums += tablet_map.second->segment_count();
-        }
-    }
+    for_each_tablet([&](const TabletSharedPtr& tablet) { segment_nums += 
tablet->segment_count(); },
+                    filter_all_tablets);
     return segment_nums;
 }
 
@@ -686,63 +691,60 @@ TabletSharedPtr 
TabletManager::find_best_tablet_to_compaction(
     uint32_t compaction_score = 0;
     double tablet_scan_frequency = 0.0;
     TabletSharedPtr best_tablet;
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& tablet_map : tablets_shard.tablet_map) {
-            const TabletSharedPtr& tablet_ptr = tablet_map.second;
-            if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), 
compaction_type)) {
-                continue;
-            }
+    auto handler = [&](const TabletSharedPtr& tablet_ptr) {
+        if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), 
compaction_type)) {
+            return;
+        }
 
-            auto search = 
tablet_submitted_compaction.find(tablet_ptr->tablet_id());
-            if (search != tablet_submitted_compaction.end()) {
-                continue;
-            }
+        auto search = 
tablet_submitted_compaction.find(tablet_ptr->tablet_id());
+        if (search != tablet_submitted_compaction.end()) {
+            return;
+        }
 
-            int64_t last_failure_ms = 
tablet_ptr->last_cumu_compaction_failure_time();
-            if (compaction_type == CompactionType::BASE_COMPACTION) {
-                last_failure_ms = 
tablet_ptr->last_base_compaction_failure_time();
-            }
-            if (now_ms - last_failure_ms <= 
config::min_compaction_failure_interval_sec * 1000) {
-                continue;
-            }
+        int64_t last_failure_ms = 
tablet_ptr->last_cumu_compaction_failure_time();
+        if (compaction_type == CompactionType::BASE_COMPACTION) {
+            last_failure_ms = tablet_ptr->last_base_compaction_failure_time();
+        }
+        if (now_ms - last_failure_ms <= 
config::min_compaction_failure_interval_sec * 1000) {
+            return;
+        }
 
-            if (compaction_type == CompactionType::BASE_COMPACTION) {
-                std::unique_lock<std::mutex> 
lock(tablet_ptr->get_base_compaction_lock(),
-                                                  std::try_to_lock);
-                if (!lock.owns_lock()) {
-                    LOG(INFO) << "can not get base lock: " << 
tablet_ptr->tablet_id();
-                    continue;
-                }
-            } else {
-                std::unique_lock<std::mutex> 
lock(tablet_ptr->get_cumulative_compaction_lock(),
-                                                  std::try_to_lock);
-                if (!lock.owns_lock()) {
-                    LOG(INFO) << "can not get cumu lock: " << 
tablet_ptr->tablet_id();
-                    continue;
-                }
+        if (compaction_type == CompactionType::BASE_COMPACTION) {
+            std::unique_lock<std::mutex> 
lock(tablet_ptr->get_base_compaction_lock(),
+                                              std::try_to_lock);
+            if (!lock.owns_lock()) {
+                LOG(INFO) << "can not get base lock: " << 
tablet_ptr->tablet_id();
+                return;
+            }
+        } else {
+            std::unique_lock<std::mutex> 
lock(tablet_ptr->get_cumulative_compaction_lock(),
+                                              std::try_to_lock);
+            if (!lock.owns_lock()) {
+                LOG(INFO) << "can not get cumu lock: " << 
tablet_ptr->tablet_id();
+                return;
             }
+        }
 
-            uint32_t current_compaction_score = 
tablet_ptr->calc_compaction_score(
-                    compaction_type, cumulative_compaction_policy);
+        uint32_t current_compaction_score =
+                tablet_ptr->calc_compaction_score(compaction_type, 
cumulative_compaction_policy);
 
-            double scan_frequency = 0.0;
-            if (config::compaction_tablet_scan_frequency_factor != 0) {
-                scan_frequency = tablet_ptr->calculate_scan_frequency();
-            }
+        double scan_frequency = 0.0;
+        if (config::compaction_tablet_scan_frequency_factor != 0) {
+            scan_frequency = tablet_ptr->calculate_scan_frequency();
+        }
 
-            double tablet_score =
-                    config::compaction_tablet_scan_frequency_factor * 
scan_frequency +
-                    config::compaction_tablet_compaction_score_factor * 
current_compaction_score;
-            if (tablet_score > highest_score) {
-                highest_score = tablet_score;
-                compaction_score = current_compaction_score;
-                tablet_scan_frequency = scan_frequency;
-                best_tablet = tablet_ptr;
-            }
+        double tablet_score =
+                config::compaction_tablet_scan_frequency_factor * 
scan_frequency +
+                config::compaction_tablet_compaction_score_factor * 
current_compaction_score;
+        if (tablet_score > highest_score) {
+            highest_score = tablet_score;
+            compaction_score = current_compaction_score;
+            tablet_scan_frequency = scan_frequency;
+            best_tablet = tablet_ptr;
         }
-    }
+    };
 
+    for_each_tablet(handler, filter_all_tablets);
     if (best_tablet != nullptr) {
         VLOG_CRITICAL << "Found the best tablet for compaction. "
                       << "compaction_type=" << compaction_type_str
@@ -913,33 +915,31 @@ Status 
TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
     DorisMetrics::instance()->report_all_tablets_requests_total->increment(1);
     HistogramStat tablet_version_num_hist;
     auto local_cache = std::make_shared<std::vector<TTabletStat>>();
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& item : tablets_shard.tablet_map) {
-            uint64_t tablet_id = item.first;
-            TabletSharedPtr tablet_ptr = item.second;
-            TTablet t_tablet;
-            TTabletInfo tablet_info;
-            tablet_ptr->build_tablet_report_info(&tablet_info, true);
-            // find expired transaction corresponding to this tablet
-            TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(), 
tablet_ptr->tablet_uid());
-            auto find = expire_txn_map.find(tinfo);
-            if (find != expire_txn_map.end()) {
-                tablet_info.__set_transaction_ids(find->second);
-                expire_txn_map.erase(find);
-            }
-            t_tablet.tablet_infos.push_back(tablet_info);
-            tablet_version_num_hist.add(tablet_ptr->version_count());
-            tablets_info->emplace(tablet_id, t_tablet);
-            TTabletStat t_tablet_stat;
-            t_tablet_stat.__set_tablet_id(tablet_info.tablet_id);
-            t_tablet_stat.__set_data_size(tablet_info.data_size);
-            t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
-            t_tablet_stat.__set_row_num(tablet_info.row_count);
-            t_tablet_stat.__set_version_count(tablet_info.version_count);
-            local_cache->emplace_back(std::move(t_tablet_stat));
+    auto handler = [&](const TabletSharedPtr& tablet_ptr) {
+        uint64_t tablet_id = tablet_ptr->tablet_id();
+        TTablet t_tablet;
+        TTabletInfo tablet_info;
+        tablet_ptr->build_tablet_report_info(&tablet_info, true);
+        // find expired transaction corresponding to this tablet
+        TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(), 
tablet_ptr->tablet_uid());
+        auto find = expire_txn_map.find(tinfo);
+        if (find != expire_txn_map.end()) {
+            tablet_info.__set_transaction_ids(find->second);
+            expire_txn_map.erase(find);
         }
-    }
+        t_tablet.tablet_infos.push_back(tablet_info);
+        tablet_version_num_hist.add(tablet_ptr->version_count());
+        tablets_info->emplace(tablet_id, t_tablet);
+        TTabletStat t_tablet_stat;
+        t_tablet_stat.__set_tablet_id(tablet_info.tablet_id);
+        t_tablet_stat.__set_data_size(tablet_info.data_size);
+        t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size);
+        t_tablet_stat.__set_row_num(tablet_info.row_count);
+        t_tablet_stat.__set_version_count(tablet_info.version_count);
+        local_cache->emplace_back(std::move(t_tablet_stat));
+    };
+    for_each_tablet(handler, filter_all_tablets);
+
     {
         std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex);
         _tablet_stat_list_cache = local_cache;
@@ -953,23 +953,9 @@ Status 
TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet>
 Status TabletManager::start_trash_sweep() {
     SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
     {
-        std::vector<TabletSharedPtr>
-                all_tablets; // we use this vector to save all tablet ptr for 
saving lock time.
-        for (auto& tablets_shard : _tablets_shards) {
-            tablet_map_t& tablet_map = tablets_shard.tablet_map;
-            {
-                std::shared_lock rdlock(tablets_shard.lock);
-                for (auto& item : tablet_map) {
-                    // try to clean empty item
-                    all_tablets.push_back(item.second);
-                }
-            }
-            // Avoid hold the shard lock too long, so we get tablet to a 
vector and clean here
-            for (const auto& tablet : all_tablets) {
-                tablet->delete_expired_stale_rowset();
-            }
-            all_tablets.clear();
-        }
+        for_each_tablet(
+                [](const TabletSharedPtr& tablet) { 
tablet->delete_expired_stale_rowset(); },
+                filter_all_tablets);
     }
 
     int32_t clean_num = 0;
@@ -1130,24 +1116,13 @@ void 
TabletManager::get_partition_related_tablets(int64_t partition_id,
 
 void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) {
     SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
-    std::vector<TabletSharedPtr> related_tablets;
-    {
-        for (auto& tablets_shard : _tablets_shards) {
-            std::shared_lock rdlock(tablets_shard.lock);
-            for (auto& item : tablets_shard.tablet_map) {
-                TabletSharedPtr& tablet_ptr = item.second;
-                if (tablet_ptr->tablet_state() != TABLET_RUNNING) {
-                    continue;
-                }
+    auto filter = [data_dir](Tablet* tablet) -> bool {
+        return tablet->tablet_state() == TABLET_RUNNING &&
+               tablet->data_dir()->path_hash() == data_dir->path_hash() && 
tablet->is_used() &&
+               tablet->init_succeeded();
+    };
 
-                if (tablet_ptr->data_dir()->path_hash() != 
data_dir->path_hash() ||
-                    !tablet_ptr->is_used() || !tablet_ptr->init_succeeded()) {
-                    continue;
-                }
-                related_tablets.push_back(tablet_ptr);
-            }
-        }
-    }
+    std::vector<TabletSharedPtr> related_tablets = get_all_tablet(filter);
     int counter = 0;
     MonotonicStopWatch watch;
     watch.start();
@@ -1318,17 +1293,14 @@ struct SortCtx {
 
 void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* 
tablets) {
     std::vector<SortCtx> sort_ctx_vec;
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& item : tablets_shard.tablet_map) {
-            const TabletSharedPtr& tablet = item.second;
-            int64_t cooldown_timestamp = -1;
-            size_t file_size = -1;
-            if (tablet->need_cooldown(&cooldown_timestamp, &file_size)) {
-                sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, 
file_size);
-            }
+    auto handler = [&](const TabletSharedPtr& tablet) {
+        int64_t cooldown_timestamp = -1;
+        size_t file_size = -1;
+        if (tablet->need_cooldown(&cooldown_timestamp, &file_size)) {
+            sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size);
         }
-    }
+    };
+    for_each_tablet(handler, filter_all_tablets);
 
     std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end(), [](SortCtx a, SortCtx 
b) {
         if (a.cooldown_timestamp != -1 && b.cooldown_timestamp != -1) {
@@ -1354,44 +1326,58 @@ void 
TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets)
 
 void TabletManager::get_all_tablets_storage_format(TCheckStorageFormatResult* 
result) {
     DCHECK(result != nullptr);
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& item : tablets_shard.tablet_map) {
-            uint64_t tablet_id = item.first;
-            if (item.second->all_beta()) {
-                result->v2_tablets.push_back(tablet_id);
-            } else {
-                result->v1_tablets.push_back(tablet_id);
-            }
+    auto handler = [result](const TabletSharedPtr& tablet) {
+        if (tablet->all_beta()) {
+            result->v2_tablets.push_back(tablet->tablet_id());
+        } else {
+            result->v1_tablets.push_back(tablet->tablet_id());
         }
-    }
+    };
+
+    for_each_tablet(handler, filter_all_tablets);
     result->__isset.v1_tablets = true;
     result->__isset.v2_tablets = true;
 }
 
 std::set<int64_t> TabletManager::check_all_tablet_segment(bool repair) {
     std::set<int64_t> bad_tablets;
-    for (const auto& tablets_shard : _tablets_shards) {
+    std::map<int64_t, std::vector<int64_t>> repair_shard_bad_tablets;
+    auto handler = [&](const TabletSharedPtr& tablet) {
+        if (!tablet->check_all_rowset_segment()) {
+            int64_t tablet_id = tablet->tablet_id();
+            bad_tablets.insert(tablet_id);
+            if (repair) {
+                repair_shard_bad_tablets[tablet_id & 
_tablets_shards_mask].push_back(tablet_id);
+            }
+        }
+    };
+    for_each_tablet(handler, filter_all_tablets);
+
+    for (const auto& [shard_index, shard_tablets] : repair_shard_bad_tablets) {
+        auto& tablets_shard = _tablets_shards[shard_index];
+        auto& tablet_map = tablets_shard.tablet_map;
         std::lock_guard<std::shared_mutex> wrlock(tablets_shard.lock);
-        for (const auto& item : tablets_shard.tablet_map) {
-            TabletSharedPtr tablet = item.second;
-            if (!tablet->check_all_rowset_segment()) {
-                bad_tablets.insert(tablet->tablet_id());
-                if (repair) {
-                    tablet->set_tablet_state(TABLET_SHUTDOWN);
-                    tablet->save_meta();
-                    {
-                        std::lock_guard<std::shared_mutex> 
shutdown_tablets_wrlock(
-                                _shutdown_tablets_lock);
-                        _shutdown_tablets.push_back(tablet);
-                    }
-                    LOG(WARNING) << "There are some segments lost, set tablet 
to shutdown state."
-                                 << "tablet_id=" << tablet->tablet_id()
-                                 << ", tablet_path=" << tablet->tablet_path();
+        for (auto tablet_id : shard_tablets) {
+            auto it = tablet_map.find(tablet_id);
+            if (it == tablet_map.end()) {
+                bad_tablets.erase(tablet_id);
+                LOG(WARNING) << "Bad tablet has be removed. tablet_id=" << 
tablet_id;
+            } else {
+                const auto& tablet = it->second;
+                tablet->set_tablet_state(TABLET_SHUTDOWN);
+                tablet->save_meta();
+                {
+                    std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock(
+                            _shutdown_tablets_lock);
+                    _shutdown_tablets.push_back(tablet);
                 }
+                LOG(WARNING) << "There are some segments lost, set tablet to 
shutdown state."
+                             << "tablet_id=" << tablet->tablet_id()
+                             << ", tablet_path=" << tablet->tablet_path();
             }
         }
     }
+
     return bad_tablets;
 }
 
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 378ff499ac..b0b5914441 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -78,8 +78,15 @@ public:
     TabletSharedPtr get_tablet(TTabletId tablet_id, TabletUid tablet_uid,
                                bool include_deleted = false, std::string* err 
= nullptr);
 
-    std::vector<TabletSharedPtr> get_all_tablet(std::function<bool(Tablet*)>&& 
filter =
-                                                        [](Tablet* t) { return 
t->is_used(); });
+    std::vector<TabletSharedPtr> get_all_tablet(
+            std::function<bool(Tablet*)>&& filter = filter_used_tablets);
+
+    // Handler not hold the shard lock.
+    void for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler,
+                         std::function<bool(Tablet*)>&& filter = 
filter_used_tablets);
+
+    static bool filter_all_tablets(Tablet* tablet) { return true; }
+    static bool filter_used_tablets(Tablet* tablet) { return 
tablet->is_used(); }
 
     uint64_t get_rowset_nums();
     uint64_t get_segment_nums();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to