This is an automated email from the ASF dual-hosted git repository.

lide pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new d1ffdc76d64 [branch-1.2](bug)  fix fe schedule clone task stuck in 
running state #26397
d1ffdc76d64 is described below

commit d1ffdc76d641c74cb843265e6630564ff1d4be7f
Author: xy720 <22125576+xy...@users.noreply.github.com>
AuthorDate: Tue Nov 7 10:10:28 2023 +0800

    [branch-1.2](bug)  fix fe schedule clone task stuck in running state #26397
---
 .../java/org/apache/doris/clone/TabletSchedCtx.java   |  8 ++++++++
 .../java/org/apache/doris/clone/TabletScheduler.java  | 19 +++++++++++++++----
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
index b904654c74d..bad60d8ec43 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletSchedCtx.java
@@ -293,6 +293,10 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
         return failedSchedCounter;
     }
 
+    public void resetFailedSchedCounter() {
+        failedSchedCounter = 0;
+    }
+
     public void increaseFailedRunningCounter() {
         ++failedRunningCounter;
     }
@@ -301,6 +305,10 @@ public class TabletSchedCtx implements 
Comparable<TabletSchedCtx> {
         return failedRunningCounter;
     }
 
+    public boolean isExceedFailedRunningLimit() {
+        return failedRunningCounter >= RUNNING_FAILED_COUNTER_THRESHOLD;
+    }
+
     public void setLastSchedTime(long lastSchedTime) {
         this.lastSchedTime = lastSchedTime;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
index 30d6c23d8a5..0116ac3404b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java
@@ -1530,12 +1530,23 @@ public class TabletScheduler extends MasterDaemon {
         try {
             tabletCtx.finishCloneTask(cloneTask, request);
         } catch (SchedException e) {
-            tabletCtx.increaseFailedRunningCounter();
             tabletCtx.setErrMsg(e.getMessage());
             if (e.getStatus() == Status.RUNNING_FAILED) {
-                stat.counterCloneTaskFailed.incrementAndGet();
-                addToRunningTablets(tabletCtx);
-                return false;
+                tabletCtx.increaseFailedRunningCounter();
+                if (!tabletCtx.isExceedFailedRunningLimit()) {
+                    stat.counterCloneTaskFailed.incrementAndGet();
+                    tabletCtx.releaseResource(this);
+                    tabletCtx.resetFailedSchedCounter();
+                    tabletCtx.setState(TabletSchedCtx.State.PENDING);
+                    dynamicAdjustPrioAndAddBackToPendingTablets(tabletCtx, 
e.getMessage());
+                    return false;
+                } else {
+                    // unrecoverable
+                    stat.counterTabletScheduledDiscard.incrementAndGet();
+                    finalizeTabletCtx(tabletCtx, 
TabletSchedCtx.State.CANCELLED, Status.UNRECOVERABLE,
+                            e.getMessage());
+                    return true;
+                }
             } else if (e.getStatus() == Status.UNRECOVERABLE) {
                 // unrecoverable
                 stat.counterTabletScheduledDiscard.incrementAndGet();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to