This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 0148b39de0 [fix](metric) fix be down when enable_system_metrics is 
false (#16140)
0148b39de0 is described below

commit 0148b39de07621cfe8b67baceebd22ccfaa4c7b2
Author: caiconghui <55968745+caicong...@users.noreply.github.com>
AuthorDate: Sat Jan 28 00:10:39 2023 +0800

    [fix](metric) fix be down when enable_system_metrics is false (#16140)
    
    if we set enable_system_metrics to false, we will see be down with 
following message "enable metric calculator failed,
    maybe you set enable_system_metrics to false ", so fix it
    Co-authored-by: caiconghui1 <caicongh...@jd.com>
---
 be/src/common/daemon.cpp       | 52 +++++++++++++++++++++---------------------
 be/src/util/doris_metrics.cpp  |  7 ------
 be/src/util/doris_metrics.h    |  6 -----
 be/src/util/system_metrics.cpp | 21 +++++++++++++++++
 be/src/util/system_metrics.h   |  9 ++++++++
 5 files changed, 56 insertions(+), 39 deletions(-)

diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index 6b150cfefc..0711a345c2 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -242,9 +242,11 @@ void Daemon::calculate_metrics_thread() {
         if (last_ts == -1L) {
             last_ts = GetMonoTimeMicros() / 1000;
             lst_query_bytes = 
DorisMetrics::instance()->query_scan_bytes->value();
-            
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
-            
DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes,
-                                                                            
&lst_net_receive_bytes);
+            if (config::enable_system_metrics) {
+                
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
+                
DorisMetrics::instance()->system_metrics()->get_network_traffic(
+                        &lst_net_send_bytes, &lst_net_receive_bytes);
+            }
         } else {
             int64_t current_ts = GetMonoTimeMicros() / 1000;
             long interval = (current_ts - last_ts) / 1000;
@@ -256,23 +258,27 @@ void Daemon::calculate_metrics_thread() {
             
DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : 
qps);
             lst_query_bytes = current_query_bytes;
 
-            // 2. max disk io util
-            DorisMetrics::instance()->max_disk_io_util_percent->set_value(
-                    
DorisMetrics::instance()->system_metrics()->get_max_io_util(lst_disks_io_time,
-                                                                               
 15));
-            // update lst map
-            
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
-
-            // 3. max network traffic
-            int64_t max_send = 0;
-            int64_t max_receive = 0;
-            DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
-                    lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, 
&max_receive);
-            
DorisMetrics::instance()->max_network_send_bytes_rate->set_value(max_send);
-            
DorisMetrics::instance()->max_network_receive_bytes_rate->set_value(max_receive);
-            // update lst map
-            
DorisMetrics::instance()->system_metrics()->get_network_traffic(&lst_net_send_bytes,
-                                                                            
&lst_net_receive_bytes);
+            if (config::enable_system_metrics) {
+                // 2. max disk io util
+                
DorisMetrics::instance()->system_metrics()->update_max_disk_io_util_percent(
+                        lst_disks_io_time, 15);
+
+                // update lst map
+                
DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
+
+                // 3. max network traffic
+                int64_t max_send = 0;
+                int64_t max_receive = 0;
+                
DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
+                        lst_net_send_bytes, lst_net_receive_bytes, 15, 
&max_send, &max_receive);
+                
DorisMetrics::instance()->system_metrics()->update_max_network_send_bytes_rate(
+                        max_send);
+                
DorisMetrics::instance()->system_metrics()->update_max_network_receive_bytes_rate(
+                        max_receive);
+                // update lst map
+                
DorisMetrics::instance()->system_metrics()->get_network_traffic(
+                        &lst_net_send_bytes, &lst_net_receive_bytes);
+            }
         }
     } while 
(!_stop_background_threads_latch.wait_for(std::chrono::seconds(15)));
 }
@@ -381,12 +387,6 @@ void Daemon::start() {
     CHECK(st.ok()) << st;
 
     if (config::enable_metric_calculator) {
-        CHECK(DorisMetrics::instance()->is_inited())
-                << "enable metric calculator failed, maybe you set 
enable_system_metrics to false "
-                << " or there may be some hardware error which causes metric 
init failed, please "
-                   "check log first;"
-                << " you can set enable_metric_calculator = false to quickly 
recover ";
-
         st = Thread::create(
                 "Daemon", "calculate_metrics_thread",
                 [this]() { this->calculate_metrics_thread(); }, 
&_calculate_metrics_thread);
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index e5dad525fb..fb4f16237b 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -142,9 +142,6 @@ 
DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(compaction_waitting_permits, MetricUnit::NOUN
 DEFINE_HISTOGRAM_METRIC_PROTOTYPE_2ARG(tablet_version_num_distribution, 
MetricUnit::NOUNIT);
 
 DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(query_scan_bytes_per_second, 
MetricUnit::BYTES);
-DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_disk_io_util_percent, 
MetricUnit::PERCENT);
-DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_send_bytes_rate, 
MetricUnit::BYTES);
-DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(max_network_receive_bytes_rate, 
MetricUnit::BYTES);
 
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(readable_blocks_total, 
MetricUnit::BLOCKS);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(writable_blocks_total, 
MetricUnit::BLOCKS);
@@ -275,9 +272,6 @@ DorisMetrics::DorisMetrics() : 
_metric_registry(_s_registry_name) {
     HISTOGRAM_METRIC_REGISTER(_server_metric_entity, 
tablet_version_num_distribution);
 
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
query_scan_bytes_per_second);
-    INT_GAUGE_METRIC_REGISTER(_server_metric_entity, max_disk_io_util_percent);
-    INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
max_network_send_bytes_rate);
-    INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
max_network_receive_bytes_rate);
 
     INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_rows);
     INT_COUNTER_METRIC_REGISTER(_server_metric_entity, load_bytes);
@@ -325,7 +319,6 @@ void DorisMetrics::initialize(bool init_system_metrics, 
const std::set<std::stri
     if (init_system_metrics) {
         _system_metrics.reset(
                 new SystemMetrics(&_metric_registry, disk_devices, 
network_interfaces));
-        _is_inited = true;
     }
 }
 
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index f859c904d9..f779d44a3e 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -147,9 +147,6 @@ public:
     // The following metrics will be calculated
     // by metric calculator
     IntGauge* query_scan_bytes_per_second;
-    IntGauge* max_disk_io_util_percent;
-    IntGauge* max_network_send_bytes_rate;
-    IntGauge* max_network_receive_bytes_rate;
 
     // Metrics related with file reader/writer
     IntCounter* local_file_reader_total;
@@ -234,7 +231,6 @@ public:
     MetricRegistry* metric_registry() { return &_metric_registry; }
     SystemMetrics* system_metrics() { return _system_metrics.get(); }
     MetricEntity* server_entity() { return _server_metric_entity.get(); }
-    bool is_inited() const { return _is_inited; }
 
 private:
     // Don't allow constructor
@@ -253,8 +249,6 @@ private:
     std::unique_ptr<SystemMetrics> _system_metrics;
 
     std::shared_ptr<MetricEntity> _server_metric_entity;
-
-    bool _is_inited = false;
 };
 
 }; // namespace doris
diff --git a/be/src/util/system_metrics.cpp b/be/src/util/system_metrics.cpp
index d1926aedd6..11703f2187 100644
--- a/be/src/util/system_metrics.cpp
+++ b/be/src/util/system_metrics.cpp
@@ -301,6 +301,10 @@ struct ProcMetrics {
     IntAtomicCounter* proc_procs_blocked;
 };
 
+DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_disk_io_util_percent, 
MetricUnit::PERCENT);
+DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_network_send_bytes_rate, 
MetricUnit::BYTES);
+DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(_max_network_receive_bytes_rate, 
MetricUnit::BYTES);
+
 const char* SystemMetrics::_s_hook_name = "system_metrics";
 
 SystemMetrics::SystemMetrics(MetricRegistry* registry, const 
std::set<std::string>& disk_devices,
@@ -318,6 +322,10 @@ SystemMetrics::SystemMetrics(MetricRegistry* registry, 
const std::set<std::strin
     _install_snmp_metrics(_server_entity.get());
     _install_load_avg_metrics(_server_entity.get());
     _install_proc_metrics(_server_entity.get());
+
+    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), _max_disk_io_util_percent);
+    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), 
_max_network_send_bytes_rate);
+    INT_GAUGE_METRIC_REGISTER(_server_entity.get(), 
_max_network_receive_bytes_rate);
 }
 
 SystemMetrics::~SystemMetrics() {
@@ -858,6 +866,19 @@ void SystemMetrics::get_max_net_traffic(const 
std::map<std::string, int64_t>& ls
     *rcv_rate = max_rcv / interval_sec;
 }
 
+void SystemMetrics::update_max_disk_io_util_percent(const 
std::map<std::string, int64_t>& lst_value,
+                                                    int64_t interval_sec) {
+    _max_disk_io_util_percent->set_value(get_max_io_util(lst_value, 
interval_sec));
+}
+
+void SystemMetrics::update_max_network_send_bytes_rate(int64_t 
max_send_bytes_rate) {
+    _max_network_send_bytes_rate->set_value(max_send_bytes_rate);
+}
+
+void SystemMetrics::update_max_network_receive_bytes_rate(int64_t 
max_receive_bytes_rate) {
+    _max_network_receive_bytes_rate->set_value(max_receive_bytes_rate);
+}
+
 void SystemMetrics::_install_proc_metrics(MetricEntity* entity) {
     _proc_metrics.reset(new ProcMetrics(entity));
 }
diff --git a/be/src/util/system_metrics.h b/be/src/util/system_metrics.h
index 903588602d..5354f494de 100644
--- a/be/src/util/system_metrics.h
+++ b/be/src/util/system_metrics.h
@@ -51,6 +51,11 @@ public:
                              const std::map<std::string, int64_t>& lst_rcv_map,
                              int64_t interval_sec, int64_t* send_rate, 
int64_t* rcv_rate);
 
+    void update_max_disk_io_util_percent(const std::map<std::string, int64_t>& 
lst_value,
+                                         int64_t interval_sec);
+    void update_max_network_send_bytes_rate(int64_t max_send_bytes_rate);
+    void update_max_network_receive_bytes_rate(int64_t max_receive_bytes_rate);
+
 private:
     void _install_cpu_metrics();
     // On Intel(R) Xeon(R) CPU E5-2450 0 @ 2.10GHz;
@@ -99,6 +104,10 @@ private:
     size_t _line_buf_size = 0;
     MetricRegistry* _registry = nullptr;
     std::shared_ptr<MetricEntity> _server_entity = nullptr;
+
+    IntGauge* _max_disk_io_util_percent;
+    IntGauge* _max_network_send_bytes_rate;
+    IntGauge* _max_network_receive_bytes_rate;
 };
 
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to