This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 0148b39de0 [fix](metric) fix be down when enable_system_metrics is false (#16140) 0148b39de0 is described below commit 0148b39de07621cfe8b67baceebd22ccfaa4c7b2 Author: caiconghui <55968745+caicong...@users.noreply.github.com> AuthorDate: Sat Jan 28 00:10:39 2023 +0800 [fix](metric) fix be down when enable_system_metrics is false (#16140) if we set enable_system_metrics to false, we will see be down with following message "enable metric calculator failed, maybe you set enable_system_metrics to false ", so fix it Co-authored-by: caiconghui1 <caicongh...@jd.com> --- be/src/common/daemon.cpp | 52 +++++++++++++++++++++--------------------- be/src/util/doris_metrics.cpp | 7 ------ be/src/util/doris_metrics.h | 6 ----- be/src/util/system_metrics.cpp | 21 +++++++++++++++++ be/src/util/system_metrics.h | 9 ++++++++ 5 files changed, 56 insertions(+), 39 deletions(-) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 6b150cfefc..0711a345c2 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -242,9 +242,11 @@ void Daemon::calculate_metrics_thread() { if (last_ts == -1L) { last_ts = GetMonoTimeMicros() / 1000; lst_query_bytes = DorisMetrics::instance()->query_scan_bytes->value(); - DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time); - DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes, - &lst_net_receive_bytes); + if (config::enable_system_metrics) { + DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time); + DorisMetrics::instance()->system_metrics()->get_network_traffic( + &lst_net_send_bytes, &lst_net_receive_bytes); + } } else { int64_t current_ts = GetMonoTimeMicros() / 1000; long interval = (current_ts - last_ts) / 1000; @@ -256,23 +258,27 @@ void Daemon::calculate_metrics_thread() { DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : qps); lst_query_bytes = current_query_bytes; - // 2. max disk io util - DorisMetrics::instance()->max_disk_io_util_percent->set_value( - DorisMetrics::instance()->system_metrics()->get_max_io_util(lst_disks_io_time, - 15)); - // update lst map - DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time); - - // 3. max network traffic - int64_t max_send = 0; - int64_t max_receive = 0; - DorisMetrics::instance()->system_metrics()->get_max_net_traffic( - lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive); - DorisMetrics::instance()->max_network_send_bytes_rate->set_value(max_send); - DorisMetrics::instance()->max_network_receive_bytes_rate->set_value(max_receive); - // update lst map - DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes, - &lst_net_receive_bytes); + if (config::enable_system_metrics) { + // 2. max disk io util + DorisMetrics::instance()->system_metrics()->update_max_disk_io_util_percent( + lst_disks_io_time, 15); + + // update lst map + DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time); + + // 3. max network traffic + int64_t max_send = 0; + int64_t max_receive = 0; + DorisMetrics::instance()->system_metrics()->get_max_net_traffic( + lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive); + DorisMetrics::instance()->system_metrics()->update_max_network_send_bytes_rate( + max_send); + DorisMetrics::instance()->system_metrics()->update_max_network_receive_bytes_rate( + max_receive); + // update lst map + DorisMetrics::instance()->system_metrics()->get_network_traffic( + &lst_net_send_bytes, &lst_net_receive_bytes); + } } } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(15))); } @@ -381,12 +387,6 @@ void Daemon::start() { CHECK(st.ok()) << st; if (config::enable_metric_calculator) { - CHECK(DorisMetrics::instance()->is_inited()) - << "enable metric calculator failed, maybe you set enable_system_metrics to false " - << " or there may be some hardware error which causes metric init failed, please " - "check log first;" - << " you can set enable_metric_calculator = false to quickly recover "; - st = Thread::create( "Daemon", "calculate_metrics_thread", [this]() { this->calculate_metrics_thread(); }, &_calculate_metrics_thread); diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp index e5dad525fb..fb4f16237b 100644 --- a/be/src/util/doris_metrics.cpp +++ b/be/src/util/doris_metrics.cpp @@ -142,9 +142,6 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_waitting_permits, MetricUnit::NOUN DEFINE_HISTOGRAM_METRIC_PROTOTYPE_2ARG(tablet_version_num_distribution, MetricUnit::NOUNIT); DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(query_scan_bytes_per_second, MetricUnit::BYTES); -DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, MetricUnit::PERCENT); -DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_send_bytes_rate, MetricUnit::BYTES); -DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_receive_bytes_rate, MetricUnit::BYTES); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(readable_blocks_total, MetricUnit::BLOCKS); DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(writable_blocks_total, MetricUnit::BLOCKS); @@ -275,9 +272,6 @@ DorisMetrics::DorisMetrics() : _metric_registry(_s_registry_name) { HISTOGRAM_METRIC_REGISTER(_server_metric_entity, tablet_version_num_distribution); INT_GAUGE_METRIC_REGISTER(_server_metric_entity, query_scan_bytes_per_second); - INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_disk_io_util_percent); - INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_network_send_bytes_rate); - INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_network_receive_bytes_rate); INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_rows); INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_bytes); @@ -325,7 +319,6 @@ void DorisMetrics::initialize(bool init_system_metrics, const std::set<std::stri if (init_system_metrics) { _system_metrics.reset( new SystemMetrics(&_metric_registry, disk_devices, network_interfaces)); - _is_inited = true; } } diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h index f859c904d9..f779d44a3e 100644 --- a/be/src/util/doris_metrics.h +++ b/be/src/util/doris_metrics.h @@ -147,9 +147,6 @@ public: // The following metrics will be calculated // by metric calculator IntGauge* query_scan_bytes_per_second; - IntGauge* max_disk_io_util_percent; - IntGauge* max_network_send_bytes_rate; - IntGauge* max_network_receive_bytes_rate; // Metrics related with file reader/writer IntCounter* local_file_reader_total; @@ -234,7 +231,6 @@ public: MetricRegistry* metric_registry() { return &_metric_registry; } SystemMetrics* system_metrics() { return _system_metrics.get(); } MetricEntity* server_entity() { return _server_metric_entity.get(); } - bool is_inited() const { return _is_inited; } private: // Don't allow constructor @@ -253,8 +249,6 @@ private: std::unique_ptr<SystemMetrics> _system_metrics; std::shared_ptr<MetricEntity> _server_metric_entity; - - bool _is_inited = false; }; }; // namespace doris diff --git a/be/src/util/system_metrics.cpp b/be/src/util/system_metrics.cpp index d1926aedd6..11703f2187 100644 --- a/be/src/util/system_metrics.cpp +++ b/be/src/util/system_metrics.cpp @@ -301,6 +301,10 @@ struct ProcMetrics { IntAtomicCounter* proc_procs_blocked; }; +DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_disk_io_util_percent, MetricUnit::PERCENT); +DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_network_send_bytes_rate, MetricUnit::BYTES); +DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_network_receive_bytes_rate, MetricUnit::BYTES); + const char* SystemMetrics::_s_hook_name = "system_metrics"; SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::string>& disk_devices, @@ -318,6 +322,10 @@ SystemMetrics::SystemMetrics(MetricRegistry* registry, const std::set<std::strin _install_snmp_metrics(_server_entity.get()); _install_load_avg_metrics(_server_entity.get()); _install_proc_metrics(_server_entity.get()); + + INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_disk_io_util_percent); + INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_network_send_bytes_rate); + INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_network_receive_bytes_rate); } SystemMetrics::~SystemMetrics() { @@ -858,6 +866,19 @@ void SystemMetrics::get_max_net_traffic(const std::map<std::string, int64_t>& ls *rcv_rate = max_rcv / interval_sec; } +void SystemMetrics::update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value, + int64_t interval_sec) { + _max_disk_io_util_percent->set_value(get_max_io_util(lst_value, interval_sec)); +} + +void SystemMetrics::update_max_network_send_bytes_rate(int64_t max_send_bytes_rate) { + _max_network_send_bytes_rate->set_value(max_send_bytes_rate); +} + +void SystemMetrics::update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate) { + _max_network_receive_bytes_rate->set_value(max_receive_bytes_rate); +} + void SystemMetrics::_install_proc_metrics(MetricEntity* entity) { _proc_metrics.reset(new ProcMetrics(entity)); } diff --git a/be/src/util/system_metrics.h b/be/src/util/system_metrics.h index 903588602d..5354f494de 100644 --- a/be/src/util/system_metrics.h +++ b/be/src/util/system_metrics.h @@ -51,6 +51,11 @@ public: const std::map<std::string, int64_t>& lst_rcv_map, int64_t interval_sec, int64_t* send_rate, int64_t* rcv_rate); + void update_max_disk_io_util_percent(const std::map<std::string, int64_t>& lst_value, + int64_t interval_sec); + void update_max_network_send_bytes_rate(int64_t max_send_bytes_rate); + void update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate); + private: void _install_cpu_metrics(); // On Intel(R) Xeon(R) CPU E5-2450 0 @ 2.10GHz; @@ -99,6 +104,10 @@ private: size_t _line_buf_size = 0; MetricRegistry* _registry = nullptr; std::shared_ptr<MetricEntity> _server_entity = nullptr; + + IntGauge* _max_disk_io_util_percent; + IntGauge* _max_network_send_bytes_rate; + IntGauge* _max_network_receive_bytes_rate; }; } // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org