TangSiyang2001 commented on code in PR #20617: URL: https://github.com/apache/doris/pull/20617#discussion_r1229599579
########## fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java: ########## @@ -1815,11 +1816,35 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long } if (!ok || !countDownLatch.getStatus().ok()) { - errMsg = "Failed to create partition[" + partitionName + "]. Timeout:" + (timeout / 1000) + " seconds."; + SystemInfoService infoService = Env.getCurrentSystemInfo(); + List<String> allBEHost = countDownLatch.getLeftMarks().stream().map(item -> { + return infoService.getBackend(item.getKey()).getHost(); + }).distinct().collect(Collectors.toList()); + List<String> downBEList = countDownLatch.getLeftMarks().stream().map(item -> { + return infoService.getBackend(item.getKey()); + }).distinct().filter(new Predicate<Backend>() { + @Override + public boolean test(Backend backend) { + return !backend.isAlive(); + } + }).map(Backend::getHost).collect(Collectors.toList()); + + if (null != allBEHost || allBEHost.size() != 0) { + if (null != downBEList || downBEList.size() != 0) { + allBEHost.removeAll(downBEList); + } + String timeoutBE = StringUtils.join(allBEHost, ","); + errMsg += "Failed to create partition[" + partitionName + "] in " + timeoutBE + ". Timeout:" + ( + timeout / 1000) + " seconds."; + } Review Comment: Can be implemented more concisely. `!countDownLatch.getStatus().ok()` means agent task failed, and the correct error msg in detailed will be set, including BE down, so it might be unnecessary to check BE status anymore. `!ok` can be treated as real timeout. So just move the Timeout related msg to that branch. Something like that: ```java if (!ok || !countDownLatch.getStatus().ok()) { errMsg = "Failed to create partition[" + partitionName + "]."; // clear tasks AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); if (!countDownLatch.getStatus().ok()) { errMsg += " Error: " + countDownLatch.getStatus().getErrorMsg(); } else { errMsg += "Timeout:" + (timeout / 1000) + " seconds." List<Entry<Long, Long>> unfinishedMarks = countDownLatch.getLeftMarks(); // only show at most 3 results List<Entry<Long, Long>> subList = unfinishedMarks.subList(0, Math.min(unfinishedMarks.size(), 3)); if (!subList.isEmpty()) { errMsg += " Unfinished mark: " + Joiner.on(", ").join(subList); } } LOG.warn(errMsg); throw new DdlException(errMsg); } ``` ########## fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java: ########## @@ -1815,11 +1816,35 @@ private Partition createPartitionWithIndices(String clusterName, long dbId, long } if (!ok || !countDownLatch.getStatus().ok()) { - errMsg = "Failed to create partition[" + partitionName + "]. Timeout:" + (timeout / 1000) + " seconds."; + SystemInfoService infoService = Env.getCurrentSystemInfo(); + List<String> allBEHost = countDownLatch.getLeftMarks().stream().map(item -> { + return infoService.getBackend(item.getKey()).getHost(); + }).distinct().collect(Collectors.toList()); + List<String> downBEList = countDownLatch.getLeftMarks().stream().map(item -> { + return infoService.getBackend(item.getKey()); + }).distinct().filter(new Predicate<Backend>() { + @Override + public boolean test(Backend backend) { + return !backend.isAlive(); + } + }).map(Backend::getHost).collect(Collectors.toList()); + + if (null != allBEHost || allBEHost.size() != 0) { + if (null != downBEList || downBEList.size() != 0) { + allBEHost.removeAll(downBEList); + } + String timeoutBE = StringUtils.join(allBEHost, ","); + errMsg += "Failed to create partition[" + partitionName + "] in " + timeoutBE + ". Timeout:" + ( + timeout / 1000) + " seconds."; + } Review Comment: Can be implemented more concisely. `!countDownLatch.getStatus().ok()` means agent task failed, and the correct error msg in detailed will be set, including BE down, so it might be unnecessary to check BE status anymore. `!ok` can be treated as real timeout. So just move the Timeout related msg to that branch. Something like that: ```java if (!ok || !countDownLatch.getStatus().ok()) { errMsg = "Failed to create partition[" + partitionName + "]."; // clear tasks AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE); if (!countDownLatch.getStatus().ok()) { errMsg += " Error: " + countDownLatch.getStatus().getErrorMsg(); } else { errMsg += "Timeout:" + (timeout / 1000) + " seconds." List<Entry<Long, Long>> unfinishedMarks = countDownLatch.getLeftMarks(); // only show at most 3 results List<Entry<Long, Long>> subList = unfinishedMarks.subList(0, Math.min(unfinishedMarks.size(), 3)); if (!subList.isEmpty()) { errMsg += " Unfinished mark: " + Joiner.on(", ").join(subList); } } LOG.warn(errMsg); throw new DdlException(errMsg); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org