This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 054ef7a1b80 [Fix]fix report query statistics to FE cores (#49711)
054ef7a1b80 is described below

commit 054ef7a1b8033a10bbde3f7c45554fcbe1a81927
Author: wangbo <wan...@selectdb.com>
AuthorDate: Thu Apr 3 13:33:41 2025 +0800

    [Fix]fix report query statistics to FE cores (#49711)
    
    ### What problem does this PR solve?
    
    When using thrift to connect to FE, if TException happens, client should
    be reopened, or core may happens.
    ### Release note
    
    None
---
 be/src/runtime/runtime_query_statistics_mgr.cpp | 62 ++++++++++++++-----------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/be/src/runtime/runtime_query_statistics_mgr.cpp 
b/be/src/runtime/runtime_query_statistics_mgr.cpp
index 0d3c976fedd..eb16cbd003a 100644
--- a/be/src/runtime/runtime_query_statistics_mgr.cpp
+++ b/be/src/runtime/runtime_query_statistics_mgr.cpp
@@ -94,6 +94,14 @@ static Status _do_report_exec_stats_rpc(const 
TNetworkAddress& coor_addr,
                     PrintThriftNetworkAddress(coor_addr), e.what());
         }
         return Status::RpcError("Send stats failed");
+    } catch (apache::thrift::TException& e) {
+        LOG_WARNING("Failed to report query profile to {}, reason: {} ",
+                    PrintThriftNetworkAddress(coor_addr), e.what());
+        std::this_thread::sleep_for(
+                
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
+        // just reopen to disable this connection
+        static_cast<void>(rpc_client.reopen(config::thrift_rpc_timeout_ms));
+        return Status::RpcError("Transport exception when report query 
profile");
     } catch (std::exception& e) {
         LOG_WARNING(
                 "Failed to report query profile to {}, reason: {}, you can see 
fe log for details.",
@@ -415,36 +423,38 @@ void 
RuntimeQueryStatisticsMgr::report_runtime_query_statistics() {
         TReportExecStatusResult res;
         Status rpc_status;
         try {
-            coord->reportExecStatus(res, params);
-            rpc_result[addr] = true;
-        } catch (apache::thrift::TApplicationException& e) {
-            LOG(WARNING) << "[report_query_statistics]fe " << add_str
-                         << " throw exception when report statistics, reason:" 
<< e.what()
-                         << " , you can see fe log for details.";
-        } catch (apache::thrift::transport::TTransportException& e) {
-            LOG(WARNING) << "[report_query_statistics]report workload runtime 
statistics to "
-                         << add_str << " failed,  reason: " << e.what();
-            rpc_status = coord.reopen(config::thrift_rpc_timeout_ms);
-            if (!rpc_status.ok()) {
-                LOG(WARNING) << "[report_query_statistics]reopen thrift client 
failed when report "
-                                "workload runtime statistics to"
-                             << add_str;
-            } else {
-                try {
+            try {
+                coord->reportExecStatus(res, params);
+                rpc_result[addr] = true;
+            } catch (apache::thrift::transport::TTransportException& e) {
+                LOG_WARNING(
+                        "[report_query_statistics] report to fe {} failed, 
reason:{}, try reopen.",
+                        add_str, e.what());
+                rpc_status = coord.reopen(config::thrift_rpc_timeout_ms);
+                if (!rpc_status.ok()) {
+                    LOG_WARNING(
+                            "[report_query_statistics]reopen thrift client 
failed when report "
+                            "workload runtime statistics to {}, reason: {}",
+                            add_str, rpc_status.to_string());
+                } else {
                     coord->reportExecStatus(res, params);
                     rpc_result[addr] = true;
-                } catch (apache::thrift::transport::TTransportException& e2) {
-                    LOG(WARNING)
-                            << "[report_query_statistics]retry report workload 
runtime stats to "
-                            << add_str << " failed,  reason: " << e2.what();
-                } catch (std::exception& e) {
-                    LOG_WARNING(
-                            "[report_query_statistics]unknow exception when 
report workload "
-                            "runtime statistics to {}, "
-                            "reason:{}. ",
-                            add_str, e.what());
                 }
             }
+        } catch (apache::thrift::TApplicationException& e) {
+            LOG_WARNING(
+                    "[report_query_statistics]fe {} throw exception when 
report statistics, "
+                    "reason:{}, you can see fe log for details.",
+                    add_str, e.what());
+        } catch (apache::thrift::TException& e) {
+            LOG_WARNING(
+                    "[report_query_statistics]report workload runtime 
statistics to {} failed,  "
+                    "reason: {}",
+                    add_str, e.what());
+            std::this_thread::sleep_for(
+                    
std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2));
+            // just reopen to disable this connection
+            static_cast<void>(coord.reopen(config::thrift_rpc_timeout_ms));
         } catch (std::exception& e) {
             LOG_WARNING(
                     "[report_query_statistics]unknown exception when report 
workload runtime "


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to