This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 054ef7a1b80 [Fix]fix report query statistics to FE cores (#49711) 054ef7a1b80 is described below commit 054ef7a1b8033a10bbde3f7c45554fcbe1a81927 Author: wangbo <wan...@selectdb.com> AuthorDate: Thu Apr 3 13:33:41 2025 +0800 [Fix]fix report query statistics to FE cores (#49711) ### What problem does this PR solve? When using thrift to connect to FE, if TException happens, client should be reopened, or core may happens. ### Release note None --- be/src/runtime/runtime_query_statistics_mgr.cpp | 62 ++++++++++++++----------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/be/src/runtime/runtime_query_statistics_mgr.cpp b/be/src/runtime/runtime_query_statistics_mgr.cpp index 0d3c976fedd..eb16cbd003a 100644 --- a/be/src/runtime/runtime_query_statistics_mgr.cpp +++ b/be/src/runtime/runtime_query_statistics_mgr.cpp @@ -94,6 +94,14 @@ static Status _do_report_exec_stats_rpc(const TNetworkAddress& coor_addr, PrintThriftNetworkAddress(coor_addr), e.what()); } return Status::RpcError("Send stats failed"); + } catch (apache::thrift::TException& e) { + LOG_WARNING("Failed to report query profile to {}, reason: {} ", + PrintThriftNetworkAddress(coor_addr), e.what()); + std::this_thread::sleep_for( + std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2)); + // just reopen to disable this connection + static_cast<void>(rpc_client.reopen(config::thrift_rpc_timeout_ms)); + return Status::RpcError("Transport exception when report query profile"); } catch (std::exception& e) { LOG_WARNING( "Failed to report query profile to {}, reason: {}, you can see fe log for details.", @@ -415,36 +423,38 @@ void RuntimeQueryStatisticsMgr::report_runtime_query_statistics() { TReportExecStatusResult res; Status rpc_status; try { - coord->reportExecStatus(res, params); - rpc_result[addr] = true; - } catch (apache::thrift::TApplicationException& e) { - LOG(WARNING) << "[report_query_statistics]fe " << add_str - << " throw exception when report statistics, reason:" << e.what() - << " , you can see fe log for details."; - } catch (apache::thrift::transport::TTransportException& e) { - LOG(WARNING) << "[report_query_statistics]report workload runtime statistics to " - << add_str << " failed, reason: " << e.what(); - rpc_status = coord.reopen(config::thrift_rpc_timeout_ms); - if (!rpc_status.ok()) { - LOG(WARNING) << "[report_query_statistics]reopen thrift client failed when report " - "workload runtime statistics to" - << add_str; - } else { - try { + try { + coord->reportExecStatus(res, params); + rpc_result[addr] = true; + } catch (apache::thrift::transport::TTransportException& e) { + LOG_WARNING( + "[report_query_statistics] report to fe {} failed, reason:{}, try reopen.", + add_str, e.what()); + rpc_status = coord.reopen(config::thrift_rpc_timeout_ms); + if (!rpc_status.ok()) { + LOG_WARNING( + "[report_query_statistics]reopen thrift client failed when report " + "workload runtime statistics to {}, reason: {}", + add_str, rpc_status.to_string()); + } else { coord->reportExecStatus(res, params); rpc_result[addr] = true; - } catch (apache::thrift::transport::TTransportException& e2) { - LOG(WARNING) - << "[report_query_statistics]retry report workload runtime stats to " - << add_str << " failed, reason: " << e2.what(); - } catch (std::exception& e) { - LOG_WARNING( - "[report_query_statistics]unknow exception when report workload " - "runtime statistics to {}, " - "reason:{}. ", - add_str, e.what()); } } + } catch (apache::thrift::TApplicationException& e) { + LOG_WARNING( + "[report_query_statistics]fe {} throw exception when report statistics, " + "reason:{}, you can see fe log for details.", + add_str, e.what()); + } catch (apache::thrift::TException& e) { + LOG_WARNING( + "[report_query_statistics]report workload runtime statistics to {} failed, " + "reason: {}", + add_str, e.what()); + std::this_thread::sleep_for( + std::chrono::milliseconds(config::thrift_client_retry_interval_ms * 2)); + // just reopen to disable this connection + static_cast<void>(coord.reopen(config::thrift_rpc_timeout_ms)); } catch (std::exception& e) { LOG_WARNING( "[report_query_statistics]unknown exception when report workload runtime " --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org