This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit cd272d40abdffa0ee2506526f589d268bcd4738b Author: yujun <yu.jun.re...@gmail.com> AuthorDate: Tue Jul 11 09:46:43 2023 +0800 [fix](function) fix create tablet timeout (#21682) To get tablet's local size and remote size both need lock the tablet's meta. Sometimes the meta lock was locked by other process for a long time (eg, publish version). It will cause function 'get root path info' hold the tablet manager's shard lock for a long time too. Then it will cause creating tablet could not get the shard lock and finally timeout. Let getting tablet's local size/size go out of the scope of tablet manager shard lock. --- be/src/http/action/pad_rowset_action.cpp | 3 ++ be/src/olap/compaction.cpp | 1 + be/src/olap/schema_change.cpp | 5 +++ be/src/olap/tablet.cpp | 6 ++++ be/src/olap/tablet.h | 2 ++ be/src/olap/tablet_manager.cpp | 38 +++++++++------------- be/src/olap/tablet_manager.h | 3 +- be/src/olap/task/engine_clone_task.cpp | 2 ++ be/src/util/trace.h | 29 +++++++++++++++++ .../apache/doris/datasource/InternalCatalog.java | 2 +- 10 files changed, 67 insertions(+), 24 deletions(-) diff --git a/be/src/http/action/pad_rowset_action.cpp b/be/src/http/action/pad_rowset_action.cpp index df2721f50b..1b16e3f98d 100644 --- a/be/src/http/action/pad_rowset_action.cpp +++ b/be/src/http/action/pad_rowset_action.cpp @@ -25,6 +25,8 @@ #include "olap/rowset/beta_rowset_writer.h" #include "olap/rowset/rowset.h" #include "olap/storage_engine.h" +#include "util/time.h" +#include "util/trace.h" namespace doris { @@ -95,6 +97,7 @@ Status PadRowsetAction::_pad_rowset(TabletSharedPtr tablet, const Version& versi std::vector<RowsetSharedPtr> to_delete; { std::unique_lock wlock(tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); tablet->modify_rowsets(to_add, to_delete); tablet->save_meta(); } diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index b9e9754030..8fd87a0e5f 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -467,6 +467,7 @@ Status Compaction::modify_rowsets(const Merger::Statistics* stats) { { std::lock_guard<std::mutex> wrlock_(_tablet->get_rowset_update_lock()); std::lock_guard<std::shared_mutex> wrlock(_tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); // Convert the delete bitmap of the input rowsets to output rowset for // incremental data. diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 2ceff6f7c6..dd81df84d3 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -32,6 +32,7 @@ #include "olap/wrapper_field.h" #include "runtime/memory/mem_tracker.h" #include "util/defer_op.h" +#include "util/trace.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_reader.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" @@ -1846,6 +1847,7 @@ Status SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& std::lock_guard<std::mutex> base_tablet_lock(base_tablet->get_push_lock()); std::lock_guard<std::mutex> new_tablet_lock(new_tablet->get_push_lock()); std::lock_guard<std::shared_mutex> base_tablet_wlock(base_tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); std::lock_guard<std::shared_mutex> new_tablet_wlock(new_tablet->get_header_lock()); do { @@ -2064,6 +2066,7 @@ Status SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& // step 3 std::lock_guard<std::mutex> rwlock(new_tablet->get_rowset_update_lock()); std::lock_guard<std::shared_mutex> new_wlock(new_tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); int64_t new_max_version = new_tablet->max_version().second; rowsets.clear(); if (max_version < new_max_version) { @@ -2094,6 +2097,7 @@ Status SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2& } else { // set state to ready std::lock_guard<std::shared_mutex> new_wlock(new_tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); res = new_tablet->set_tablet_state(TabletState::TABLET_RUNNING); if (!res) { break; @@ -2171,6 +2175,7 @@ Status SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangeParams { // save tablet meta here because rowset meta is not saved during add rowset std::lock_guard<std::shared_mutex> new_wlock(sc_params.new_tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); sc_params.new_tablet->save_meta(); } if (res) { diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index ce1b14f8a1..968b6ad9d4 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -68,6 +68,7 @@ namespace doris { using namespace ErrorCode; +using namespace std::chrono_literals; using std::pair; using std::nothrow; @@ -75,6 +76,8 @@ using std::sort; using std::string; using std::vector; +const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD = 10s; + DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_bytes, MetricUnit::BYTES); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_finish_count, MetricUnit::OPERATIONS); @@ -300,6 +303,7 @@ Status Tablet::revise_tablet_meta(const std::vector<RowsetMetaSharedPtr>& rowset Status Tablet::add_rowset(RowsetSharedPtr rowset) { DCHECK(rowset != nullptr); std::lock_guard<std::shared_mutex> wrlock(_meta_lock); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); // If the rowset already exist, just return directly. The rowset_id is an unique-id, // we can use it to check this situation. if (_contains_rowset(rowset->rowset_id())) { @@ -535,6 +539,7 @@ void Tablet::_delete_stale_rowset_by_version(const Version& version) { void Tablet::delete_expired_stale_rowset() { int64_t now = UnixSeconds(); std::lock_guard<std::shared_mutex> wrlock(_meta_lock); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); // Compute the end time to delete rowsets, when a expired rowset createtime less then this time, it will be deleted. double expired_stale_sweep_endtime = ::difftime(now, config::tablet_rowset_stale_sweep_time_sec); @@ -972,6 +977,7 @@ void Tablet::_max_continuous_version_from_beginning_unlocked(Version* version, V void Tablet::calculate_cumulative_point() { std::lock_guard<std::shared_mutex> wrlock(_meta_lock); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); int64_t ret_cumulative_point; _cumulative_compaction_policy->calculate_cumulative_point( this, _tablet_meta->all_rs_metas(), _cumulative_point, &ret_cumulative_point); diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h index d34597323a..15e17223c3 100644 --- a/be/src/olap/tablet.h +++ b/be/src/olap/tablet.h @@ -62,6 +62,8 @@ using TabletSharedPtr = std::shared_ptr<Tablet>; enum TabletStorageType { STORAGE_TYPE_LOCAL, STORAGE_TYPE_REMOTE, STORAGE_TYPE_REMOTE_AND_LOCAL }; +extern const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD; + class Tablet : public BaseTablet { public: static TabletSharedPtr create_tablet_from_meta(TabletMetaSharedPtr tablet_meta, diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index fa7c25e9c4..5f2bd1b5c0 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -473,6 +473,7 @@ Status TabletManager::_drop_tablet_unlocked(TTabletId tablet_id, TReplicaId repl if (!keep_files) { // drop tablet will update tablet meta, should lock std::lock_guard<std::shared_mutex> wrlock(to_drop_tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); LOG(INFO) << "set tablet to shutdown state and remove it from memory. " << "tablet_id=" << tablet_id << ", tablet_path=" << to_drop_tablet->tablet_path(); // NOTE: has to update tablet here, but must not update tablet meta directly. @@ -583,18 +584,14 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId tablet_id, TabletUid tablet_ return nullptr; } -std::vector<TabletSharedPtr> TabletManager::get_all_tablet() { +std::vector<TabletSharedPtr> TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) { std::vector<TabletSharedPtr> res; for (const auto& tablets_shard : _tablets_shards) { std::shared_lock rdlock(tablets_shard.lock); - for (const auto& tablet_map : tablets_shard.tablet_map) { - // these are tablets which is not deleted - TabletSharedPtr tablet = tablet_map.second; - if (!tablet->is_used()) { - LOG(WARNING) << "tablet cannot be used. tablet=" << tablet->tablet_id(); - continue; + for (auto& [id, tablet] : tablets_shard.tablet_map) { + if (filter(tablet.get())) { + res.emplace_back(tablet); } - res.emplace_back(tablet); } } return res; @@ -1096,20 +1093,17 @@ void TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_ma size_t* tablet_count) { DCHECK(tablet_count); *tablet_count = 0; - for (const auto& tablets_shard : _tablets_shards) { - std::shared_lock rdlock(tablets_shard.lock); - for (const auto& item : tablets_shard.tablet_map) { - TabletSharedPtr tablet = item.second; - ++(*tablet_count); - auto iter = path_map->find(tablet->data_dir()->path()); - if (iter == path_map->end()) { - continue; - } - if (iter->second.is_used) { - iter->second.local_used_capacity += tablet->tablet_local_size(); - iter->second.remote_used_capacity += tablet->tablet_remote_size(); - } - } + auto filter = [path_map, tablet_count](Tablet* t) -> bool { + ++(*tablet_count); + auto iter = path_map->find(t->data_dir()->path()); + return iter != path_map->end() && iter->second.is_used; + }; + + auto tablets = get_all_tablet(filter); + for (const auto& tablet : tablets) { + auto& data_dir_info = (*path_map)[tablet->data_dir()->path()]; + data_dir_info.local_used_capacity += tablet->tablet_local_size(); + data_dir_info.remote_used_capacity += tablet->tablet_remote_size(); } } diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h index 667013d268..378ff499ac 100644 --- a/be/src/olap/tablet_manager.h +++ b/be/src/olap/tablet_manager.h @@ -78,7 +78,8 @@ public: TabletSharedPtr get_tablet(TTabletId tablet_id, TabletUid tablet_uid, bool include_deleted = false, std::string* err = nullptr); - std::vector<TabletSharedPtr> get_all_tablet(); + std::vector<TabletSharedPtr> get_all_tablet(std::function<bool(Tablet*)>&& filter = + [](Tablet* t) { return t->is_used(); }); uint64_t get_rowset_nums(); uint64_t get_segment_nums(); diff --git a/be/src/olap/task/engine_clone_task.cpp b/be/src/olap/task/engine_clone_task.cpp index 5c8c2f8070..9352c7a0ce 100644 --- a/be/src/olap/task/engine_clone_task.cpp +++ b/be/src/olap/task/engine_clone_task.cpp @@ -34,6 +34,7 @@ #include "runtime/thread_context.h" #include "util/defer_op.h" #include "util/thrift_rpc_helper.h" +#include "util/trace.h" using std::set; using std::stringstream; @@ -466,6 +467,7 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const std::string& clone_d std::lock_guard<std::mutex> push_lock(tablet->get_push_lock()); std::lock_guard<std::mutex> rwlock(tablet->get_rowset_update_lock()); std::lock_guard<std::shared_mutex> wrlock(tablet->get_header_lock()); + SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD); // check clone dir existed if (!FileUtils::check_exist(clone_dir)) { return Status::InternalError("clone dir not existed. clone_dir={}", clone_dir); diff --git a/be/src/util/trace.h b/be/src/util/trace.h index 40c6ba265f..7fe0c9e0c2 100644 --- a/be/src/util/trace.h +++ b/be/src/util/trace.h @@ -18,18 +18,21 @@ #include <rapidjson/writer.h> +#include <chrono> #include <iosfwd> #include <memory> #include <string> #include <utility> #include <vector> +#include "common/logging.h" #include "gutil/macros.h" #include "gutil/ref_counted.h" #include "gutil/strings/stringpiece.h" #include "gutil/strings/substitute.h" #include "gutil/threading/thread_collision_warner.h" #include "gutil/walltime.h" +#include "util/scoped_cleanup.h" #include "util/spinlock.h" #include "util/trace_metrics.h" @@ -112,6 +115,32 @@ class Trace; } \ }() +// If this scope times out, make a simple trace. +// It will log the cost time only. +// Timeout is chrono duration struct, eg: 5ms, 100 * 1s. +#define SCOPED_SIMPLE_TRACE_IF_TIMEOUT(timeout) \ + SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(timeout, LOG(WARNING)) + +// If this scope times out, then put simple trace to the stream. +// Timeout is chrono duration struct, eg: 5ms, 100 * 1s. +// For example: +// +// std::string tag = "[foo]"; +// SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(5s, LOG(INFO) << tag); +// +#define SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(timeout, stream) \ + using namespace std::chrono_literals; \ + auto VARNAME_LINENUM(scoped_simple_trace) = doris::MonotonicMicros(); \ + SCOPED_CLEANUP({ \ + auto VARNAME_LINENUM(timeout_us) = \ + std::chrono::duration_cast<std::chrono::microseconds>(timeout).count(); \ + auto VARNAME_LINENUM(cost_us) = \ + doris::MonotonicMicros() - VARNAME_LINENUM(scoped_simple_trace); \ + if (VARNAME_LINENUM(cost_us) >= VARNAME_LINENUM(timeout_us)) { \ + stream << "Simple trace cost(us): " << VARNAME_LINENUM(cost_us); \ + } \ + }) + namespace doris { struct TraceEntry; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index 8a93277bf5..ca24af08eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -1763,7 +1763,7 @@ public class InternalCatalog implements CatalogIf<Database> { } if (!ok || !countDownLatch.getStatus().ok()) { - errMsg = "Failed to create partition[" + partitionName + "]. Timeout."; + errMsg = "Failed to create partition[" + partitionName + "]. Timeout:" + (timeout / 1000) + " seconds."; // clear tasks AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org