This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 1c6db41b7ac branch-3.0: [fix](restore) Cut down restore timeout when create replicas failed #47278 (#47497) 1c6db41b7ac is described below commit 1c6db41b7ace2f4a49bb85a1814d3c84bd1de2ee Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> AuthorDate: Wed Feb 19 10:34:23 2025 +0800 branch-3.0: [fix](restore) Cut down restore timeout when create replicas failed #47278 (#47497) Cherry-picked from #47278 Co-authored-by: Uniqueyou <wangyix...@selectdb.com> --- .../src/main/java/org/apache/doris/backup/RestoreJob.java | 13 ++++++++++++- .../java/org/apache/doris/common/MarkedCountDownLatch.java | 6 ++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index 92dab6552d6..c3f242143d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -62,6 +62,7 @@ import org.apache.doris.common.MarkedCountDownLatch; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; +import org.apache.doris.common.util.DbUtil; import org.apache.doris.common.util.DebugPointUtil; import org.apache.doris.common.util.DynamicPartitionUtil; import org.apache.doris.common.util.PropertyAnalyzer; @@ -173,7 +174,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { private boolean reserveReplica = false; private boolean reserveDynamicPartitionEnable = false; - + private long createReplicasTimeStamp = -1; // this 2 members is to save all newly restored objs // tbl name -> part @SerializedName("rp") @@ -981,6 +982,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { // No log here, PENDING state restore job will redo this method state = RestoreJobState.CREATING; + createReplicasTimeStamp = System.currentTimeMillis(); } private void waitingAllReplicasCreated() { @@ -989,6 +991,14 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { if (!createReplicaTasksLatch.await(0, TimeUnit.SECONDS)) { LOG.info("waiting {} create replica tasks for restore to finish. {}", createReplicaTasksLatch.getCount(), this); + long createReplicasTimeOut = DbUtil.getCreateReplicasTimeoutMs(createReplicaTasksLatch.getMarkCount()); + long tryCreateTime = System.currentTimeMillis() - createReplicasTimeStamp; + if (tryCreateTime > createReplicasTimeOut) { + status = new Status(ErrCode.TIMEOUT, + "restore job with create replicas timeout: " + tryCreateTime + " with label: " + label); + cancelInternal(false); + LOG.warn("restore job {} create replicas timeout, cancel {}", jobId, this); + } return; } } catch (InterruptedException e) { @@ -2380,6 +2390,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { snapshotInfos = HashBasedTable.create(); fileMapping.clear(); jobInfo.releaseSnapshotInfo(); + createReplicasTimeStamp = -1; RestoreJobState curState = state; finishedTime = System.currentTimeMillis(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java b/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java index 5c3201e2b80..0eecbc43b1d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/MarkedCountDownLatch.java @@ -30,13 +30,19 @@ public class MarkedCountDownLatch<K, V> extends CountDownLatch { private Multimap<K, V> marks; private Multimap<K, V> failedMarks; private Status st = Status.OK; + private int markCount = 0; public MarkedCountDownLatch(int count) { super(count); + this.markCount = count; marks = HashMultimap.create(); failedMarks = HashMultimap.create(); } + public int getMarkCount() { + return markCount; + } + public synchronized void addMark(K key, V value) { marks.put(key, value); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org