This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 5f5cb635df3 [chore](tablet scheduler) update sched ctx err #27514 (#31189) 5f5cb635df3 is described below commit 5f5cb635df332ffc74c22eac38834e3c9c4e2ca7 Author: yujun <yu.jun.re...@gmail.com> AuthorDate: Tue Apr 30 14:18:04 2024 +0800 [chore](tablet scheduler) update sched ctx err #27514 (#31189) --- .../org/apache/doris/clone/BeLoadRebalancer.java | 6 ++--- .../org/apache/doris/clone/SchedException.java | 1 + .../org/apache/doris/clone/TabletSchedCtx.java | 12 +++++++++ .../org/apache/doris/clone/TabletScheduler.java | 30 ++++++++++++---------- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java index 93d884af5ce..c81271eb890 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java @@ -288,7 +288,7 @@ public class BeLoadRebalancer extends Rebalancer { } } if (!setSource) { - throw new SchedException(Status.UNRECOVERABLE, "unable to take src slot"); + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to take src backend slot"); } // Select a low load backend as destination. @@ -331,7 +331,7 @@ public class BeLoadRebalancer extends Rebalancer { } if (candidates.isEmpty()) { - throw new SchedException(Status.UNRECOVERABLE, "unable to find low backend"); + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "unable to find low dest backend"); } List<BePathLoadStatPair> candFitPaths = Lists.newArrayList(); @@ -368,7 +368,7 @@ public class BeLoadRebalancer extends Rebalancer { } throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, - "unable to find low backend"); + "beload waiting for dest backend slot"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/SchedException.java b/fe/fe-core/src/main/java/org/apache/doris/clone/SchedException.java index a343e6543c3..cb7080299ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/SchedException.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/SchedException.java @@ -31,6 +31,7 @@ public class SchedException extends Exception { NONE, WAITING_DECOMMISSION, WAITING_SLOT, + DIAGNOSE_IGNORE, } private Status status; diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index b9606f09d0a..912fc1bb316 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -424,6 +424,18 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> { this.errMsg = errMsg; } + public String getErrMsg() { + return errMsg; + } + + public SubCode getSchedFailedCode() { + return schedFailedCode; + } + + public void setSchedFailedCode(SubCode code) { + schedFailedCode = code; + } + public CloneTask getCloneTask() { return cloneTask; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index ede3e541675..4a22878424d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -390,15 +390,6 @@ public class TabletScheduler extends MasterDaemon { AgentBatchTask batchTask = new AgentBatchTask(); for (TabletSchedCtx tabletCtx : currentBatch) { try { - if (Config.disable_tablet_scheduler) { - // do not schedule more tablet is tablet scheduler is disabled. - throw new SchedException(Status.FINISHED, "tablet scheduler is disabled"); - } - if (Config.disable_balance && tabletCtx.getType() == Type.BALANCE) { - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, - "config disable balance"); - continue; - } scheduleTablet(tabletCtx, batchTask); } catch (SchedException e) { tabletCtx.setErrMsg(e.getMessage()); @@ -422,6 +413,7 @@ public class TabletScheduler extends MasterDaemon { Preconditions.checkState(e.getStatus() == Status.UNRECOVERABLE, e.getStatus()); // discard stat.counterTabletScheduledDiscard.incrementAndGet(); + tabletCtx.setSchedFailedCode(e.getSubCode()); finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, e.getStatus(), e.getMessage()); } continue; @@ -429,6 +421,8 @@ public class TabletScheduler extends MasterDaemon { LOG.warn("got unexpected exception, discard this schedule. tablet: {}", tabletCtx.getTabletId(), e); stat.counterTabletScheduledFailed.incrementAndGet(); + tabletCtx.setSchedFailedCode(SubCode.NONE); + tabletCtx.setErrMsg(e.getMessage()); finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.UNEXPECTED, Status.UNRECOVERABLE, e.getMessage()); continue; } @@ -470,6 +464,14 @@ public class TabletScheduler extends MasterDaemon { * Try to schedule a single tablet. */ private void scheduleTablet(TabletSchedCtx tabletCtx, AgentBatchTask batchTask) throws SchedException { + if (Config.disable_tablet_scheduler) { + // do not schedule more tablet is tablet scheduler is disabled. + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "tablet scheduler is disabled"); + } + if (Config.disable_balance && tabletCtx.getType() == Type.BALANCE) { + throw new SchedException(Status.UNRECOVERABLE, SubCode.DIAGNOSE_IGNORE, "balance is disabled"); + } + long currentTime = System.currentTimeMillis(); tabletCtx.setLastSchedTime(currentTime); tabletCtx.setLastVisitedTime(currentTime); @@ -1447,7 +1449,7 @@ public class TabletScheduler extends MasterDaemon { if (hasBePath) { throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, - "unable to find dest path which can be fit in"); + "scheduler waiting for dest backend slot"); } else { throw new SchedException(Status.UNRECOVERABLE, "unable to find dest path which can be fit in"); @@ -1647,8 +1649,9 @@ public class TabletScheduler extends MasterDaemon { updateDestPathHash(tabletCtx); finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.FINISHED, Status.FINISHED, "finished"); } else { - finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, - request.getTaskStatus().getErrorMsgs().get(0)); + String errMsg = request.getTaskStatus().getErrorMsgs().get(0); + tabletCtx.setErrMsg(errMsg); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, errMsg); } return true; @@ -1776,6 +1779,7 @@ public class TabletScheduler extends MasterDaemon { // Set "resetReplicaState" to true because // the timeout task should also be considered as UNRECOVERABLE, // so need to reset replica state. + t.setErrMsg("timeout"); finalizeTabletCtx(t, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, "timeout"); stat.counterCloneTaskTimeout.incrementAndGet(); }); @@ -1946,7 +1950,7 @@ public class TabletScheduler extends MasterDaemon { LOG.debug("path hash is not set.", new Exception()); } throw new SchedException(Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, - "path hash is not set"); + "backend " + beId + " path hash is not set"); } Slot slot = pathSlots.get(pathHash); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org