This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f70dfc64316 [fix](cloud) fix dead cloud cluster status empty (#32471) f70dfc64316 is described below commit f70dfc64316514ff92102bd6b0e0d4f9de19dbaa Author: yujun <yu.jun.re...@gmail.com> AuthorDate: Wed Mar 20 23:38:29 2024 +0800 [fix](cloud) fix dead cloud cluster status empty (#32471) --- .../java/org/apache/doris/qe/ConnectContext.java | 2 + .../java/org/apache/doris/qe/StmtExecutor.java | 51 +++++++++++----------- 2 files changed, 28 insertions(+), 25 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index 4ef75173786..33e5ff72d91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -1071,6 +1071,8 @@ public class ConnectContext { } /** + * @param updateErr whether set this connect state to error when the returned cluster is null or empty. + * * @return Returns an available cluster in the following order * 1 Use an explicitly specified cluster * 2 If no cluster is specified, the user's default cluster is used diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index a4ea0cb25bc..808fea13be2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -791,38 +791,39 @@ public class StmtExecutor { // cloud mode retry LOG.debug("due to exception {} retry {} rpc {} user {}", e.getMessage(), i, e instanceof RpcException, e instanceof UserException); - // errCode = 2, detailMessage = There is no scanNode Backend available.[10003: not alive] - List<String> bes = Env.getCurrentSystemInfo().getAllBackendIds().stream() - .map(id -> Long.toString(id)).collect(Collectors.toList()); String msg = e.getMessage(); boolean isNeedRetry = true; - if (e instanceof UserException - && msg.contains(SystemInfoService.NO_SCAN_NODE_BACKEND_AVAILABLE_MSG)) { + if (Config.isCloudMode()) { isNeedRetry = false; - Matcher matcher = beIpPattern.matcher(msg); - // here retry planner not be recreated, so - // in cloud mode drop node, be id invalid, so need not retry - // such as be ids [11000, 11001] -> after drop node 11001 - // don't need to retry 11001's request - if (matcher.find()) { - String notAliveBe = matcher.group(1); - isNeedRetry = bes.contains(notAliveBe); - if (isNeedRetry) { - Backend abnormalBe = Env.getCurrentSystemInfo().getBackend(Long.parseLong(notAliveBe)); - String deadCloudClusterStatus = abnormalBe.getCloudClusterStatus(); - String deadCloudClusterClusterName = abnormalBe.getCloudClusterName(); - LOG.info("need retry cluster {} status {}-{}", deadCloudClusterClusterName, - deadCloudClusterStatus, ClusterStatus.valueOf(deadCloudClusterStatus)); - if (ClusterStatus.valueOf(deadCloudClusterStatus) != ClusterStatus.NORMAL) { - CloudSystemInfoService.waitForAutoStart(deadCloudClusterClusterName); + // errCode = 2, detailMessage = There is no scanNode Backend available.[10003: not alive] + List<String> bes = Env.getCurrentSystemInfo().getAllBackendIds().stream() + .map(id -> Long.toString(id)).collect(Collectors.toList()); + if (e instanceof UserException + && msg.contains(SystemInfoService.NO_SCAN_NODE_BACKEND_AVAILABLE_MSG)) { + Matcher matcher = beIpPattern.matcher(msg); + // here retry planner not be recreated, so + // in cloud mode drop node, be id invalid, so need not retry + // such as be ids [11000, 11001] -> after drop node 11001 + // don't need to retry 11001's request + if (matcher.find()) { + String notAliveBe = matcher.group(1); + isNeedRetry = bes.contains(notAliveBe); + if (isNeedRetry) { + Backend abnormalBe = Env.getCurrentSystemInfo().getBackend(Long.parseLong(notAliveBe)); + String deadCloudClusterStatus = abnormalBe.getCloudClusterStatus(); + String deadCloudClusterClusterName = abnormalBe.getCloudClusterName(); + LOG.info("need retry cluster {} status {}", deadCloudClusterClusterName, + deadCloudClusterStatus); + if (Strings.isNullOrEmpty(deadCloudClusterStatus) + || ClusterStatus.valueOf(deadCloudClusterStatus) != ClusterStatus.NORMAL) { + CloudSystemInfoService.waitForAutoStart(deadCloudClusterClusterName); + } } } } } - if (i == retryTime - 1 || !isNeedRetry) { - throw e; - } - if (context.getConnectType().equals(ConnectType.MYSQL) && !context.getMysqlChannel().isSend()) { + if (i != retryTime - 1 && isNeedRetry + && context.getConnectType().equals(ConnectType.MYSQL) && !context.getMysqlChannel().isSend()) { LOG.warn("retry {} times. stmt: {}", (i + 1), parsedStmt.getOrigStmt().originStmt); } else { throw e; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org