This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new 97ce93ed58 [fix](create tablet) fix backend create tablet timeout (#23885) 97ce93ed58 is described below commit 97ce93ed58d344b9e879e94cc2772bf8bc1fe0e2 Author: yujun <yu.jun.re...@gmail.com> AuthorDate: Sun Sep 10 11:13:38 2023 +0800 [fix](create tablet) fix backend create tablet timeout (#23885) --- be/src/olap/tablet.cpp | 1 + be/src/olap/tablet_manager.cpp | 308 ++++++++++++++++++++--------------------- be/src/olap/tablet_manager.h | 11 +- 3 files changed, 157 insertions(+), 163 deletions(-) diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 968b6ad9d4..98ef536d9f 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2404,6 +2404,7 @@ void Tablet::update_self_owned_remote_rowsets( } bool Tablet::check_all_rowset_segment() { + std::shared_lock rdlock(_meta_lock); for (auto& version_rowset : _rs_version_map) { RowsetSharedPtr rowset = version_rowset.second; if (!rowset->check_rowset_segment()) { diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index a523ec620d..14c2489c24 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -599,36 +599,41 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TabletUid tablet_ std::vector<TabletSharedPtr> TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) { std::vector<TabletSharedPtr> res; + for_each_tablet([&](const TabletSharedPtr& tablet) { res.emplace_back(tablet); }, + std::move(filter)); + return res; +} + +void TabletManager::for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler, + std::function<bool(Tablet*)>&& filter) { + std::vector<TabletSharedPtr> tablets; for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (auto& [id, tablet] : tablets_shard.tablet_map) { - if (filter(tablet.get())) { - res.emplace_back(tablet); + tablets.clear(); + { + std::shared_lock rdlock(tablets_shard.lock); + for (const auto& [id, tablet] : tablets_shard.tablet_map) { + if (filter(tablet.get())) { + tablets.emplace_back(tablet); + } } } + for (const auto& tablet : tablets) { + handler(tablet); + } } - return res; } uint64_t TabletManager::get_rowset_nums() { uint64_t rowset_nums = 0; - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& tablet_map : tablets_shard.tablet_map) { - rowset_nums += tablet_map.second->version_count(); - } - } + for_each_tablet([&](const TabletSharedPtr& tablet) { rowset_nums += tablet->version_count(); }, + filter_all_tablets); return rowset_nums; } uint64_t TabletManager::get_segment_nums() { uint64_t segment_nums = 0; - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& tablet_map : tablets_shard.tablet_map) { - segment_nums += tablet_map.second->segment_count(); - } - } + for_each_tablet([&](const TabletSharedPtr& tablet) { segment_nums += tablet->segment_count(); }, + filter_all_tablets); return segment_nums; } @@ -686,63 +691,60 @@ TabletSharedPtr TabletManager::find_best_tablet_to_compaction( uint32_t compaction_score = 0; double tablet_scan_frequency = 0.0; TabletSharedPtr best_tablet; - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& tablet_map : tablets_shard.tablet_map) { - const TabletSharedPtr& tablet_ptr = tablet_map.second; - if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), compaction_type)) { - continue; - } + auto handler = [&](const TabletSharedPtr& tablet_ptr) { + if (!tablet_ptr->can_do_compaction(data_dir->path_hash(), compaction_type)) { + return; + } - auto search = tablet_submitted_compaction.find(tablet_ptr->tablet_id()); - if (search != tablet_submitted_compaction.end()) { - continue; - } + auto search = tablet_submitted_compaction.find(tablet_ptr->tablet_id()); + if (search != tablet_submitted_compaction.end()) { + return; + } - int64_t last_failure_ms = tablet_ptr->last_cumu_compaction_failure_time(); - if (compaction_type == CompactionType::BASE_COMPACTION) { - last_failure_ms = tablet_ptr->last_base_compaction_failure_time(); - } - if (now_ms - last_failure_ms <= config::min_compaction_failure_interval_sec * 1000) { - continue; - } + int64_t last_failure_ms = tablet_ptr->last_cumu_compaction_failure_time(); + if (compaction_type == CompactionType::BASE_COMPACTION) { + last_failure_ms = tablet_ptr->last_base_compaction_failure_time(); + } + if (now_ms - last_failure_ms <= config::min_compaction_failure_interval_sec * 1000) { + return; + } - if (compaction_type == CompactionType::BASE_COMPACTION) { - std::unique_lock<std::mutex> lock(tablet_ptr->get_base_compaction_lock(), - std::try_to_lock); - if (!lock.owns_lock()) { - LOG(INFO) << "can not get base lock: " << tablet_ptr->tablet_id(); - continue; - } - } else { - std::unique_lock<std::mutex> lock(tablet_ptr->get_cumulative_compaction_lock(), - std::try_to_lock); - if (!lock.owns_lock()) { - LOG(INFO) << "can not get cumu lock: " << tablet_ptr->tablet_id(); - continue; - } + if (compaction_type == CompactionType::BASE_COMPACTION) { + std::unique_lock<std::mutex> lock(tablet_ptr->get_base_compaction_lock(), + std::try_to_lock); + if (!lock.owns_lock()) { + LOG(INFO) << "can not get base lock: " << tablet_ptr->tablet_id(); + return; + } + } else { + std::unique_lock<std::mutex> lock(tablet_ptr->get_cumulative_compaction_lock(), + std::try_to_lock); + if (!lock.owns_lock()) { + LOG(INFO) << "can not get cumu lock: " << tablet_ptr->tablet_id(); + return; } + } - uint32_t current_compaction_score = tablet_ptr->calc_compaction_score( - compaction_type, cumulative_compaction_policy); + uint32_t current_compaction_score = + tablet_ptr->calc_compaction_score(compaction_type, cumulative_compaction_policy); - double scan_frequency = 0.0; - if (config::compaction_tablet_scan_frequency_factor != 0) { - scan_frequency = tablet_ptr->calculate_scan_frequency(); - } + double scan_frequency = 0.0; + if (config::compaction_tablet_scan_frequency_factor != 0) { + scan_frequency = tablet_ptr->calculate_scan_frequency(); + } - double tablet_score = - config::compaction_tablet_scan_frequency_factor * scan_frequency + - config::compaction_tablet_compaction_score_factor * current_compaction_score; - if (tablet_score > highest_score) { - highest_score = tablet_score; - compaction_score = current_compaction_score; - tablet_scan_frequency = scan_frequency; - best_tablet = tablet_ptr; - } + double tablet_score = + config::compaction_tablet_scan_frequency_factor * scan_frequency + + config::compaction_tablet_compaction_score_factor * current_compaction_score; + if (tablet_score > highest_score) { + highest_score = tablet_score; + compaction_score = current_compaction_score; + tablet_scan_frequency = scan_frequency; + best_tablet = tablet_ptr; } - } + }; + for_each_tablet(handler, filter_all_tablets); if (best_tablet != nullptr) { VLOG_CRITICAL << "Found the best tablet for compaction. " << "compaction_type=" << compaction_type_str @@ -913,33 +915,31 @@ Status TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet> DorisMetrics::instance()->report_all_tablets_requests_total->increment(1); HistogramStat tablet_version_num_hist; auto local_cache = std::make_shared<std::vector<TTabletStat>>(); - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& item : tablets_shard.tablet_map) { - uint64_t tablet_id = item.first; - TabletSharedPtr tablet_ptr = item.second; - TTablet t_tablet; - TTabletInfo tablet_info; - tablet_ptr->build_tablet_report_info(&tablet_info, true); - // find expired transaction corresponding to this tablet - TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(), tablet_ptr->tablet_uid()); - auto find = expire_txn_map.find(tinfo); - if (find != expire_txn_map.end()) { - tablet_info.__set_transaction_ids(find->second); - expire_txn_map.erase(find); - } - t_tablet.tablet_infos.push_back(tablet_info); - tablet_version_num_hist.add(tablet_ptr->version_count()); - tablets_info->emplace(tablet_id, t_tablet); - TTabletStat t_tablet_stat; - t_tablet_stat.__set_tablet_id(tablet_info.tablet_id); - t_tablet_stat.__set_data_size(tablet_info.data_size); - t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size); - t_tablet_stat.__set_row_num(tablet_info.row_count); - t_tablet_stat.__set_version_count(tablet_info.version_count); - local_cache->emplace_back(std::move(t_tablet_stat)); + auto handler = [&](const TabletSharedPtr& tablet_ptr) { + uint64_t tablet_id = tablet_ptr->tablet_id(); + TTablet t_tablet; + TTabletInfo tablet_info; + tablet_ptr->build_tablet_report_info(&tablet_info, true); + // find expired transaction corresponding to this tablet + TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(), tablet_ptr->tablet_uid()); + auto find = expire_txn_map.find(tinfo); + if (find != expire_txn_map.end()) { + tablet_info.__set_transaction_ids(find->second); + expire_txn_map.erase(find); } - } + t_tablet.tablet_infos.push_back(tablet_info); + tablet_version_num_hist.add(tablet_ptr->version_count()); + tablets_info->emplace(tablet_id, t_tablet); + TTabletStat t_tablet_stat; + t_tablet_stat.__set_tablet_id(tablet_info.tablet_id); + t_tablet_stat.__set_data_size(tablet_info.data_size); + t_tablet_stat.__set_remote_data_size(tablet_info.remote_data_size); + t_tablet_stat.__set_row_num(tablet_info.row_count); + t_tablet_stat.__set_version_count(tablet_info.version_count); + local_cache->emplace_back(std::move(t_tablet_stat)); + }; + for_each_tablet(handler, filter_all_tablets); + { std::lock_guard<std::mutex> guard(_tablet_stat_cache_mutex); _tablet_stat_list_cache = local_cache; @@ -953,23 +953,9 @@ Status TabletManager::build_all_report_tablets_info(std::map<TTabletId, TTablet> Status TabletManager::start_trash_sweep() { SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); { - std::vector<TabletSharedPtr> - all_tablets; // we use this vector to save all tablet ptr for saving lock time. - for (auto& tablets_shard : _tablets_shards) { - tablet_map_t& tablet_map = tablets_shard.tablet_map; - { - std::shared_lock rdlock(tablets_shard.lock); - for (auto& item : tablet_map) { - // try to clean empty item - all_tablets.push_back(item.second); - } - } - // Avoid hold the shard lock too long, so we get tablet to a vector and clean here - for (const auto& tablet : all_tablets) { - tablet->delete_expired_stale_rowset(); - } - all_tablets.clear(); - } + for_each_tablet( + [](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); }, + filter_all_tablets); } int32_t clean_num = 0; @@ -1130,24 +1116,13 @@ void TabletManager::get_partition_related_tablets(int64_t partition_id, void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) { SCOPED_CONSUME_MEM_TRACKER(_mem_tracker); - std::vector<TabletSharedPtr> related_tablets; - { - for (auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (auto& item : tablets_shard.tablet_map) { - TabletSharedPtr& tablet_ptr = item.second; - if (tablet_ptr->tablet_state() != TABLET_RUNNING) { - continue; - } + auto filter = [data_dir](Tablet* tablet) -> bool { + return tablet->tablet_state() == TABLET_RUNNING && + tablet->data_dir()->path_hash() == data_dir->path_hash() && tablet->is_used() && + tablet->init_succeeded(); + }; - if (tablet_ptr->data_dir()->path_hash() != data_dir->path_hash() || - !tablet_ptr->is_used() || !tablet_ptr->init_succeeded()) { - continue; - } - related_tablets.push_back(tablet_ptr); - } - } - } + std::vector<TabletSharedPtr> related_tablets = get_all_tablet(filter); int counter = 0; MonotonicStopWatch watch; watch.start(); @@ -1318,17 +1293,14 @@ struct SortCtx { void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets) { std::vector<SortCtx> sort_ctx_vec; - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& item : tablets_shard.tablet_map) { - const TabletSharedPtr& tablet = item.second; - int64_t cooldown_timestamp = -1; - size_t file_size = -1; - if (tablet->need_cooldown(&cooldown_timestamp, &file_size)) { - sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size); - } + auto handler = [&](const TabletSharedPtr& tablet) { + int64_t cooldown_timestamp = -1; + size_t file_size = -1; + if (tablet->need_cooldown(&cooldown_timestamp, &file_size)) { + sort_ctx_vec.emplace_back(tablet, cooldown_timestamp, file_size); } - } + }; + for_each_tablet(handler, filter_all_tablets); std::sort(sort_ctx_vec.begin(), sort_ctx_vec.end(), [](SortCtx a, SortCtx b) { if (a.cooldown_timestamp != -1 && b.cooldown_timestamp != -1) { @@ -1354,44 +1326,58 @@ void TabletManager::get_cooldown_tablets(std::vector<TabletSharedPtr>* tablets) void TabletManager::get_all_tablets_storage_format(TCheckStorageFormatResult* result) { DCHECK(result != nullptr); - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& item : tablets_shard.tablet_map) { - uint64_t tablet_id = item.first; - if (item.second->all_beta()) { - result->v2_tablets.push_back(tablet_id); - } else { - result->v1_tablets.push_back(tablet_id); - } + auto handler = [result](const TabletSharedPtr& tablet) { + if (tablet->all_beta()) { + result->v2_tablets.push_back(tablet->tablet_id()); + } else { + result->v1_tablets.push_back(tablet->tablet_id()); } - } + }; + + for_each_tablet(handler, filter_all_tablets); result->__isset.v1_tablets = true; result->__isset.v2_tablets = true; } std::set<int64_t> TabletManager::check_all_tablet_segment(bool repair) { std::set<int64_t> bad_tablets; - for (const auto& tablets_shard : _tablets_shards) { + std::map<int64_t, std::vector<int64_t>> repair_shard_bad_tablets; + auto handler = [&](const TabletSharedPtr& tablet) { + if (!tablet->check_all_rowset_segment()) { + int64_t tablet_id = tablet->tablet_id(); + bad_tablets.insert(tablet_id); + if (repair) { + repair_shard_bad_tablets[tablet_id & _tablets_shards_mask].push_back(tablet_id); + } + } + }; + for_each_tablet(handler, filter_all_tablets); + + for (const auto& [shard_index, shard_tablets] : repair_shard_bad_tablets) { + auto& tablets_shard = _tablets_shards[shard_index]; + auto& tablet_map = tablets_shard.tablet_map; std::lock_guard<std::shared_mutex> wrlock(tablets_shard.lock); - for (const auto& item : tablets_shard.tablet_map) { - TabletSharedPtr tablet = item.second; - if (!tablet->check_all_rowset_segment()) { - bad_tablets.insert(tablet->tablet_id()); - if (repair) { - tablet->set_tablet_state(TABLET_SHUTDOWN); - tablet->save_meta(); - { - std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock( - _shutdown_tablets_lock); - _shutdown_tablets.push_back(tablet); - } - LOG(WARNING) << "There are some segments lost, set tablet to shutdown state." - << "tablet_id=" << tablet->tablet_id() - << ", tablet_path=" << tablet->tablet_path(); + for (auto tablet_id : shard_tablets) { + auto it = tablet_map.find(tablet_id); + if (it == tablet_map.end()) { + bad_tablets.erase(tablet_id); + LOG(WARNING) << "Bad tablet has be removed. tablet_id=" << tablet_id; + } else { + const auto& tablet = it->second; + tablet->set_tablet_state(TABLET_SHUTDOWN); + tablet->save_meta(); + { + std::lock_guard<std::shared_mutex> shutdown_tablets_wrlock( + _shutdown_tablets_lock); + _shutdown_tablets.push_back(tablet); } + LOG(WARNING) << "There are some segments lost, set tablet to shutdown state." + << "tablet_id=" << tablet->tablet_id() + << ", tablet_path=" << tablet->tablet_path(); } } } + return bad_tablets; } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 378ff499ac..b0b5914441 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -78,8 +78,15 @@ public: TabletSharedPtr get_tablet(TTabletId tablet_id, TabletUid tablet_uid, bool include_deleted = false, std::string* err = nullptr); - std::vector<TabletSharedPtr> get_all_tablet(std::function<bool(Tablet*)>&& filter = - [](Tablet* t) { return t->is_used(); }); + std::vector<TabletSharedPtr> get_all_tablet( + std::function<bool(Tablet*)>&& filter = filter_used_tablets); + + // Handler not hold the shard lock. + void for_each_tablet(std::function<void(const TabletSharedPtr&)>&& handler, + std::function<bool(Tablet*)>&& filter = filter_used_tablets); + + static bool filter_all_tablets(Tablet* tablet) { return true; } + static bool filter_used_tablets(Tablet* tablet) { return tablet->is_used(); } uint64_t get_rowset_nums(); uint64_t get_segment_nums(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org