This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 15eca4c5db1 branch-4.0: [fix](cloud) Skip skewed warmup rowset latency 
samples (#62941) (#63132)
15eca4c5db1 is described below

commit 15eca4c5db1c8fcc40ec69c6600b75ef86fe58e1
Author: bobhan1 <[email protected]>
AuthorDate: Thu May 28 10:20:40 2026 +0800

    branch-4.0: [fix](cloud) Skip skewed warmup rowset latency samples (#62941) 
(#63132)
    
    pick https://github.com/apache/doris/pull/62941
---
 be/src/cloud/cloud_internal_service.cpp | 66 ++++++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 22 deletions(-)

diff --git a/be/src/cloud/cloud_internal_service.cpp 
b/be/src/cloud/cloud_internal_service.cpp
index 7b6514455eb..45cc2016b25 100644
--- a/be/src/cloud/cloud_internal_service.cpp
+++ b/be/src/cloud/cloud_internal_service.cpp
@@ -20,6 +20,7 @@
 #include <bthread/countdown_event.h>
 
 #include <algorithm>
+#include <optional>
 #include <thread>
 
 #include "cloud/cloud_storage_engine.h"
@@ -89,6 +90,22 @@ FileCacheType cache_type_to_pb(io::FileCacheType type) {
     return FileCacheType::NORMAL;
 }
 
+static int64_t current_unix_time_us() {
+    return std::chrono::duration_cast<std::chrono::microseconds>(
+                   std::chrono::system_clock::now().time_since_epoch())
+            .count();
+}
+
+static std::optional<int64_t> warm_up_rowset_cross_host_latency_us(int64_t 
start_unix_ts_us,
+                                                                   int64_t 
end_unix_ts_us) {
+    // The start timestamp is generated by the caller BE. Mixed-version 
callers may omit it, and
+    // system clocks across BEs are not guaranteed to be ordered.
+    if (start_unix_ts_us <= 0 || end_unix_ts_us < start_unix_ts_us) {
+        return std::nullopt;
+    }
+    return end_unix_ts_us - start_unix_ts_us;
+}
+
 void CloudInternalServiceImpl::get_file_cache_meta_by_tablet_id(
         google::protobuf::RpcController* controller [[maybe_unused]],
         const PGetFileCacheMetaRequest* request, PGetFileCacheMetaResponse* 
response,
@@ -436,15 +453,18 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
         auto tablet = res.value();
         auto tablet_meta = tablet->tablet_meta();
 
-        int64_t handle_ts = 
std::chrono::duration_cast<std::chrono::microseconds>(
-                                    
std::chrono::system_clock::now().time_since_epoch())
-                                    .count();
+        int64_t handle_ts = current_unix_time_us();
         g_file_cache_warm_up_rowset_last_handle_unix_ts.set_value(handle_ts);
         int64_t request_ts = request->has_unix_ts_us() ? request->unix_ts_us() 
: 0;
-        g_file_cache_warm_up_rowset_request_to_handle_latency << (handle_ts - 
request_ts);
-        if (request_ts > 0 && handle_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
+        auto request_to_handle_latency_us =
+                warm_up_rowset_cross_host_latency_us(request_ts, handle_ts);
+        if (request_to_handle_latency_us.has_value()) {
+            g_file_cache_warm_up_rowset_request_to_handle_latency << 
*request_to_handle_latency_us;
+        }
+        if (request_to_handle_latency_us.has_value() &&
+            *request_to_handle_latency_us > config::warm_up_rowset_slow_log_ms 
* 1000) {
             g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
-            LOG(INFO) << "warm up rowset (request to handle) took " << 
handle_ts - request_ts
+            LOG(INFO) << "warm up rowset (request to handle) took " << 
*request_to_handle_latency_us
                       << " us, tablet_id: " << rs_meta.tablet_id()
                       << ", rowset_id: " << rowset_id.to_string();
         }
@@ -478,18 +498,19 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
                     if (st.ok()) {
                         g_file_cache_event_driven_warm_up_finished_segment_num 
<< 1;
                         
g_file_cache_event_driven_warm_up_finished_segment_size << segment_size;
-                        int64_t now_ts =
-                                
std::chrono::duration_cast<std::chrono::microseconds>(
-                                        
std::chrono::system_clock::now().time_since_epoch())
-                                        .count();
+                        int64_t now_ts = current_unix_time_us();
                         
g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
-                        g_file_cache_warm_up_rowset_latency << (now_ts - 
request_ts);
+                        auto rowset_latency_us =
+                                
warm_up_rowset_cross_host_latency_us(request_ts, now_ts);
+                        if (rowset_latency_us.has_value()) {
+                            g_file_cache_warm_up_rowset_latency << 
*rowset_latency_us;
+                        }
                         g_file_cache_warm_up_rowset_handle_to_finish_latency
                                 << (now_ts - handle_ts);
-                        if (request_ts > 0 &&
-                            now_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
+                        if (rowset_latency_us.has_value() &&
+                            *rowset_latency_us > 
config::warm_up_rowset_slow_log_ms * 1000) {
                             g_file_cache_warm_up_rowset_slow_count << 1;
-                            LOG(INFO) << "warm up rowset took " << now_ts - 
request_ts
+                            LOG(INFO) << "warm up rowset took " << 
*rowset_latency_us
                                       << " us, tablet_id: " << tablet_id
                                       << ", rowset_id: " << 
rowset_id.to_string()
                                       << ", segment_id: " << segment_id;
@@ -559,18 +580,19 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
                     if (st.ok()) {
                         g_file_cache_event_driven_warm_up_finished_index_num 
<< 1;
                         g_file_cache_event_driven_warm_up_finished_index_size 
<< idx_size;
-                        int64_t now_ts =
-                                
std::chrono::duration_cast<std::chrono::microseconds>(
-                                        
std::chrono::system_clock::now().time_since_epoch())
-                                        .count();
+                        int64_t now_ts = current_unix_time_us();
                         
g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
-                        g_file_cache_warm_up_rowset_latency << (now_ts - 
request_ts);
+                        auto rowset_latency_us =
+                                
warm_up_rowset_cross_host_latency_us(request_ts, now_ts);
+                        if (rowset_latency_us.has_value()) {
+                            g_file_cache_warm_up_rowset_latency << 
*rowset_latency_us;
+                        }
                         g_file_cache_warm_up_rowset_handle_to_finish_latency
                                 << (now_ts - handle_ts);
-                        if (request_ts > 0 &&
-                            now_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
+                        if (rowset_latency_us.has_value() &&
+                            *rowset_latency_us > 
config::warm_up_rowset_slow_log_ms * 1000) {
                             g_file_cache_warm_up_rowset_slow_count << 1;
-                            LOG(INFO) << "warm up rowset took " << now_ts - 
request_ts
+                            LOG(INFO) << "warm up rowset took " << 
*rowset_latency_us
                                       << " us, tablet_id: " << tablet_id
                                       << ", rowset_id: " << 
rowset_id.to_string()
                                       << ", segment_id: " << segment_id;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to