This is an automated email from the ASF dual-hosted git repository. lide pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new d1ffdc76d64 [branch-1.2](bug) fix fe schedule clone task stuck in running state #26397 d1ffdc76d64 is described below commit d1ffdc76d641c74cb843265e6630564ff1d4be7f Author: xy720 <22125576+xy...@users.noreply.github.com> AuthorDate: Tue Nov 7 10:10:28 2023 +0800 [branch-1.2](bug) fix fe schedule clone task stuck in running state #26397 --- .../java/org/apache/doris/clone/TabletSchedCtx.java | 8 ++++++++ .../java/org/apache/doris/clone/TabletScheduler.java | 19 +++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java index b904654c74d..bad60d8ec43 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java @@ -293,6 +293,10 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> { return failedSchedCounter; } + public void resetFailedSchedCounter() { + failedSchedCounter = 0; + } + public void increaseFailedRunningCounter() { ++failedRunningCounter; } @@ -301,6 +305,10 @@ public class TabletSchedCtx implements Comparable<TabletSchedCtx> { return failedRunningCounter; } + public boolean isExceedFailedRunningLimit() { + return failedRunningCounter >= RUNNING_FAILED_COUNTER_THRESHOLD; + } + public void setLastSchedTime(long lastSchedTime) { this.lastSchedTime = lastSchedTime; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 30d6c23d8a5..0116ac3404b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -1530,12 +1530,23 @@ public class TabletScheduler extends MasterDaemon { try { tabletCtx.finishCloneTask(cloneTask, request); } catch (SchedException e) { - tabletCtx.increaseFailedRunningCounter(); tabletCtx.setErrMsg(e.getMessage()); if (e.getStatus() == Status.RUNNING_FAILED) { - stat.counterCloneTaskFailed.incrementAndGet(); - addToRunningTablets(tabletCtx); - return false; + tabletCtx.increaseFailedRunningCounter(); + if (!tabletCtx.isExceedFailedRunningLimit()) { + stat.counterCloneTaskFailed.incrementAndGet(); + tabletCtx.releaseResource(this); + tabletCtx.resetFailedSchedCounter(); + tabletCtx.setState(TabletSchedCtx.State.PENDING); + dynamicAdjustPrioAndAddBackToPendingTablets(tabletCtx, e.getMessage()); + return false; + } else { + // unrecoverable + stat.counterTabletScheduledDiscard.incrementAndGet(); + finalizeTabletCtx(tabletCtx, TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE, + e.getMessage()); + return true; + } } else if (e.getStatus() == Status.UNRECOVERABLE) { // unrecoverable stat.counterTabletScheduledDiscard.incrementAndGet(); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org