This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 1c6db41b7ac branch-3.0: [fix](restore) Cut down restore timeout when 
create replicas failed #47278 (#47497)
1c6db41b7ac is described below

commit 1c6db41b7ace2f4a49bb85a1814d3c84bd1de2ee
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Wed Feb 19 10:34:23 2025 +0800

    branch-3.0: [fix](restore) Cut down restore timeout when create replicas 
failed #47278 (#47497)
    
    Cherry-picked from #47278
    
    Co-authored-by: Uniqueyou <wangyix...@selectdb.com>
---
 .../src/main/java/org/apache/doris/backup/RestoreJob.java   | 13 ++++++++++++-
 .../java/org/apache/doris/common/MarkedCountDownLatch.java  |  6 ++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java 
b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
index 92dab6552d6..c3f242143d0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java
@@ -62,6 +62,7 @@ import org.apache.doris.common.MarkedCountDownLatch;
 import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.io.Text;
+import org.apache.doris.common.util.DbUtil;
 import org.apache.doris.common.util.DebugPointUtil;
 import org.apache.doris.common.util.DynamicPartitionUtil;
 import org.apache.doris.common.util.PropertyAnalyzer;
@@ -173,7 +174,7 @@ public class RestoreJob extends AbstractJob implements 
GsonPostProcessable {
 
     private boolean reserveReplica = false;
     private boolean reserveDynamicPartitionEnable = false;
-
+    private long createReplicasTimeStamp = -1;
     // this 2 members is to save all newly restored objs
     // tbl name -> part
     @SerializedName("rp")
@@ -981,6 +982,7 @@ public class RestoreJob extends AbstractJob implements 
GsonPostProcessable {
 
         // No log here, PENDING state restore job will redo this method
         state = RestoreJobState.CREATING;
+        createReplicasTimeStamp = System.currentTimeMillis();
     }
 
     private void waitingAllReplicasCreated() {
@@ -989,6 +991,14 @@ public class RestoreJob extends AbstractJob implements 
GsonPostProcessable {
             if (!createReplicaTasksLatch.await(0, TimeUnit.SECONDS)) {
                 LOG.info("waiting {} create replica tasks for restore to 
finish. {}",
                         createReplicaTasksLatch.getCount(), this);
+                long createReplicasTimeOut = 
DbUtil.getCreateReplicasTimeoutMs(createReplicaTasksLatch.getMarkCount());
+                long tryCreateTime = System.currentTimeMillis() - 
createReplicasTimeStamp;
+                if (tryCreateTime > createReplicasTimeOut) {
+                    status = new Status(ErrCode.TIMEOUT,
+                            "restore job with create replicas timeout: " + 
tryCreateTime + " with label: " + label);
+                    cancelInternal(false);
+                    LOG.warn("restore job {} create replicas timeout, cancel 
{}", jobId, this);
+                }
                 return;
             }
         } catch (InterruptedException e) {
@@ -2380,6 +2390,7 @@ public class RestoreJob extends AbstractJob implements 
GsonPostProcessable {
             snapshotInfos = HashBasedTable.create();
             fileMapping.clear();
             jobInfo.releaseSnapshotInfo();
+            createReplicasTimeStamp = -1;
 
             RestoreJobState curState = state;
             finishedTime = System.currentTimeMillis();
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
index 5c3201e2b80..0eecbc43b1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java
@@ -30,13 +30,19 @@ public class MarkedCountDownLatch<K, V> extends 
CountDownLatch {
     private Multimap<K, V> marks;
     private Multimap<K, V> failedMarks;
     private Status st = Status.OK;
+    private int markCount = 0;
 
     public MarkedCountDownLatch(int count) {
         super(count);
+        this.markCount = count;
         marks = HashMultimap.create();
         failedMarks = HashMultimap.create();
     }
 
+    public int getMarkCount() {
+        return markCount;
+    }
+
     public synchronized void addMark(K key, V value) {
         marks.put(key, value);
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to