This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 28e4c6936ca [metric](cloud) add metrics for get tablets and rowsets 
(#51320)
28e4c6936ca is described below

commit 28e4c6936ca1f298c6ca3b1e947209b3285ea32b
Author: TengJianPing <tengjianp...@selectdb.com>
AuthorDate: Sat May 31 18:05:56 2025 +0800

    [metric](cloud) add metrics for get tablets and rowsets (#51320)
---
 be/src/cloud/config.cpp                     |  1 +
 be/src/cloud/config.h                       |  1 +
 be/src/pipeline/exec/olap_scan_operator.cpp | 34 +++++++++++++++++++++++++++++
 be/src/util/doris_metrics.cpp               |  6 +++++
 be/src/util/doris_metrics.h                 |  3 +++
 be/test/util/doris_metrics_test.cpp         | 11 ++++++++++
 6 files changed, 56 insertions(+)

diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp
index bc5c90e6e94..b0f80835598 100644
--- a/be/src/cloud/config.cpp
+++ b/be/src/cloud/config.cpp
@@ -40,6 +40,7 @@ DEFINE_Int64(tablet_cache_capacity, "100000");
 DEFINE_Int64(tablet_cache_shards, "16");
 DEFINE_mInt32(tablet_sync_interval_s, "1800");
 DEFINE_mInt32(init_scanner_sync_rowsets_parallelism, "10");
+DEFINE_mInt32(sync_rowsets_slow_threshold_ms, "1000");
 
 DEFINE_mInt64(min_compaction_failure_interval_ms, "5000");
 DEFINE_mInt64(base_compaction_freeze_interval_s, "7200");
diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h
index 9e724082c9f..f7b85231cbd 100644
--- a/be/src/cloud/config.h
+++ b/be/src/cloud/config.h
@@ -71,6 +71,7 @@ DECLARE_Int64(tablet_cache_shards);
 DECLARE_mInt32(tablet_sync_interval_s);
 // parallelism for scanner init where may issue RPCs to sync rowset meta from 
MS
 DECLARE_mInt32(init_scanner_sync_rowsets_parallelism);
+DECLARE_mInt32(sync_rowsets_slow_threshold_ms);
 
 // Cloud compaction config
 DECLARE_mInt64(min_compaction_failure_interval_ms);
diff --git a/be/src/pipeline/exec/olap_scan_operator.cpp 
b/be/src/pipeline/exec/olap_scan_operator.cpp
index 58a7730e8e5..b642c857dd1 100644
--- a/be/src/pipeline/exec/olap_scan_operator.cpp
+++ b/be/src/pipeline/exec/olap_scan_operator.cpp
@@ -513,6 +513,40 @@ Status OlapScanLocalState::hold_tablets() {
             COUNTER_UPDATE(_sync_rowset_get_remote_delete_bitmap_rpc_timer,
                            sync_stats.get_remote_delete_bitmap_rpc_ns);
         }
+        auto time_ms = duration_ns / 1000 / 1000;
+        if (time_ms >= config::sync_rowsets_slow_threshold_ms) {
+            
DorisMetrics::instance()->get_remote_tablet_slow_time_ms->increment(time_ms);
+            DorisMetrics::instance()->get_remote_tablet_slow_cnt->increment(1);
+            LOG_WARNING("get tablet takes too long")
+                    .tag("query_id", 
print_id(PipelineXLocalState<>::_state->query_id()))
+                    .tag("node_id", _parent->node_id())
+                    .tag("total_time", PrettyPrinter::print(duration_ns, 
TUnit::TIME_NS))
+                    .tag("num_tablets", _tablets.size())
+                    .tag("tablet_meta_cache_hit", 
_sync_rowset_tablet_meta_cache_hit->value())
+                    .tag("tablet_meta_cache_miss", 
_sync_rowset_tablet_meta_cache_miss->value())
+                    .tag("get_remote_tablet_meta_rpc_time",
+                         PrettyPrinter::print(
+                                 
_sync_rowset_get_remote_tablet_meta_rpc_timer->value(),
+                                 TUnit::TIME_NS))
+                    .tag("remote_rowsets_num", 
_sync_rowset_get_remote_rowsets_num->value())
+                    .tag("get_remote_rowsets_rpc_time",
+                         
PrettyPrinter::print(_sync_rowset_get_remote_rowsets_rpc_timer->value(),
+                                              TUnit::TIME_NS))
+                    .tag("local_delete_bitmap_rowsets_num",
+                         
_sync_rowset_get_local_delete_bitmap_rowsets_num->value())
+                    .tag("remote_delete_bitmap_rowsets_num",
+                         
_sync_rowset_get_remote_delete_bitmap_rowsets_num->value())
+                    .tag("remote_delete_bitmap_key_count",
+                         
_sync_rowset_get_remote_delete_bitmap_key_count->value())
+                    .tag("remote_delete_bitmap_bytes",
+                         
PrettyPrinter::print(_sync_rowset_get_remote_delete_bitmap_bytes->value(),
+                                              TUnit::BYTES))
+                    .tag("get_remote_delete_bitmap_rpc_time",
+                         PrettyPrinter::print(
+                                 
_sync_rowset_get_remote_delete_bitmap_rpc_timer->value(),
+                                 TUnit::TIME_NS));
+        }
+
     } else {
         for (size_t i = 0; i < _scan_ranges.size(); i++) {
             int64_t version = 0;
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index bed4624ba9c..653ec275ae9 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -214,6 +214,9 @@ 
DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(runtime_filter_consumer_wait_ready_ms,
                                      MetricUnit::MILLISECONDS);
 DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(runtime_filter_consumer_timeout_num, 
MetricUnit::NOUNIT);
 
+DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(get_remote_tablet_slow_time_ms, 
MetricUnit::MILLISECONDS);
+DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(get_remote_tablet_slow_cnt, 
MetricUnit::NOUNIT);
+
 const std::string DorisMetrics::_s_registry_name = "doris_be";
 const std::string DorisMetrics::_s_hook_name = "doris_metrics";
 
@@ -353,6 +356,9 @@ DorisMetrics::DorisMetrics() : 
_metric_registry(_s_registry_name) {
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
runtime_filter_consumer_ready_num);
     INT_COUNTER_METRIC_REGISTER(_server_metric_entity, 
runtime_filter_consumer_wait_ready_ms);
     INT_GAUGE_METRIC_REGISTER(_server_metric_entity, 
runtime_filter_consumer_timeout_num);
+
+    INT_COUNTER_METRIC_REGISTER(_server_metric_entity, 
get_remote_tablet_slow_time_ms);
+    INT_COUNTER_METRIC_REGISTER(_server_metric_entity, 
get_remote_tablet_slow_cnt);
 }
 
 void DorisMetrics::initialize(bool init_system_metrics, const 
std::set<std::string>& disk_devices,
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 5b8515e7e5f..2a1827d06a1 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -241,6 +241,9 @@ public:
     IntCounter* runtime_filter_consumer_wait_ready_ms = nullptr;
     IntGauge* runtime_filter_consumer_timeout_num = nullptr;
 
+    IntCounter* get_remote_tablet_slow_time_ms = nullptr;
+    IntCounter* get_remote_tablet_slow_cnt = nullptr;
+
     static DorisMetrics* instance() {
         static DorisMetrics instance;
         return &instance;
diff --git a/be/test/util/doris_metrics_test.cpp 
b/be/test/util/doris_metrics_test.cpp
index 6e9969b1210..588805e71b1 100644
--- a/be/test/util/doris_metrics_test.cpp
+++ b/be/test/util/doris_metrics_test.cpp
@@ -178,6 +178,17 @@ TEST_F(DorisMetricsTest, Normal) {
         EXPECT_TRUE(metric != nullptr);
         EXPECT_STREQ("40", metric->to_string().c_str());
     }
+    {
+        
DorisMetrics::instance()->get_remote_tablet_slow_time_ms->increment(1000);
+        auto* metric = 
server_entity->get_metric("get_remote_tablet_slow_time_ms");
+        EXPECT_TRUE(metric != nullptr);
+        EXPECT_STREQ("1000", metric->to_string().c_str());
+
+        DorisMetrics::instance()->get_remote_tablet_slow_cnt->increment(10);
+        metric = server_entity->get_metric("get_remote_tablet_slow_cnt");
+        EXPECT_TRUE(metric != nullptr);
+        EXPECT_STREQ("10", metric->to_string().c_str());
+    }
 }
 
 } // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to