This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new c2ca37dcfa0 [fix](restore) Fix atomic restore with exists replicas (#40734) c2ca37dcfa0 is described below commit c2ca37dcfa0741d485415d8a0cbb37d145262ca6 Author: walter <w41te...@gmail.com> AuthorDate: Fri Sep 13 11:49:48 2024 +0800 [fix](restore) Fix atomic restore with exists replicas (#40734) 1. create replicas with base tablet and schema hash 2. ignore storage medium when creating replicas with the base tablet The atomic restore is introduced in #40353. --- be/src/olap/tablet_manager.cpp | 13 +++++++--- .../java/org/apache/doris/backup/RestoreJob.java | 30 ++++++++++++---------- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index bc883185465..801a288fde8 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -279,6 +279,7 @@ Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector // we need use write lock on shard-1 and then use read lock on shard-2 // if there have create rollup tablet C(assume on shard-2) from tablet D(assume on shard-1) at the same time, we will meet deadlock std::unique_lock two_tablet_lock(_two_tablet_mtx, std::defer_lock); + bool in_restore_mode = request.__isset.in_restore_mode && request.in_restore_mode; bool is_schema_change_or_atomic_restore = request.__isset.base_tablet_id && request.base_tablet_id > 0; bool need_two_lock = @@ -325,14 +326,20 @@ Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector if (base_tablet == nullptr) { DorisMetrics::instance()->create_tablet_requests_failed->increment(1); return Status::Error<TABLE_CREATE_META_ERROR>( - "fail to create tablet(change schema), base tablet does not exist. " - "new_tablet_id={}, base_tablet_id={}", + "fail to create tablet(change schema/atomic restore), base tablet does not " + "exist. new_tablet_id={}, base_tablet_id={}", tablet_id, request.base_tablet_id); } // If we are doing schema-change or atomic-restore, we should use the same data dir // TODO(lingbin): A litter trick here, the directory should be determined before // entering this method - if (request.storage_medium == base_tablet->data_dir()->storage_medium()) { + // + // ATTN: Since all restored replicas will be saved to HDD, so no storage_medium check here. + if (in_restore_mode || + request.storage_medium == base_tablet->data_dir()->storage_medium()) { + LOG(INFO) << "create tablet use the base tablet data dir. tablet_id=" << tablet_id + << ", base tablet_id=" << request.base_tablet_id + << ", data dir=" << base_tablet->data_dir()->path(); stores.clear(); stores.push_back(base_tablet->data_dir()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java index b499a9b525e..3f55e1b9088 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/backup/RestoreJob.java @@ -643,7 +643,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { } // the new tablets -> { local tablet, schema hash }, used in atomic restore. - Map<Long, Pair<Long, Integer>> tabletBases = null; + Map<Long, Pair<Long, Integer>> tabletBases = new HashMap<>(); // Check and prepare meta objects. AgentBatchTask batchTask = new AgentBatchTask(); @@ -802,7 +802,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { if (isAtomicRestore && localTbl != null) { // bind the backends and base tablets from local tbl. - tabletBases = bindLocalAndRemoteOlapTableReplicas((OlapTable) localTbl, remoteOlapTbl); + status = bindLocalAndRemoteOlapTableReplicas((OlapTable) localTbl, remoteOlapTbl, tabletBases); if (!status.ok()) { return; } @@ -886,7 +886,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { if (restoreTbl.getType() == TableType.OLAP) { OlapTable restoreOlapTable = (OlapTable) restoreTbl; for (Partition restorePart : restoreOlapTable.getPartitions()) { - createReplicas(db, batchTask, restoreOlapTable, restorePart); + createReplicas(db, batchTask, restoreOlapTable, restorePart, tabletBases); BackupOlapTableInfo backupOlapTableInfo = jobInfo.getOlapTableInfo(restoreOlapTable.getName()); genFileMapping(restoreOlapTable, restorePart, backupOlapTableInfo.id, backupOlapTableInfo.getPartInfo(restorePart.getName()), @@ -1025,10 +1025,9 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { // No log here, PENDING state restore job will redo this method } - private Map<Long, Pair<Long, Integer>> bindLocalAndRemoteOlapTableReplicas( - OlapTable localOlapTbl, OlapTable remoteOlapTbl) { - Map<Long, Pair<Long, Integer>> tabletBases = new HashMap<>(); - + private Status bindLocalAndRemoteOlapTableReplicas( + OlapTable localOlapTbl, OlapTable remoteOlapTbl, + Map<Long, Pair<Long, Integer>> tabletBases) { localOlapTbl.readLock(); try { for (Partition partition : remoteOlapTbl.getPartitions()) { @@ -1045,22 +1044,20 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { } int schemaHash = localOlapTbl.getSchemaHashByIndexId(localIndexId); if (schemaHash == -1) { - status = new Status(ErrCode.COMMON_ERROR, String.format( + return new Status(ErrCode.COMMON_ERROR, String.format( "schema hash of local index %d is not found, remote table=%d, remote index=%d, " + "local table=%d, local index=%d", localIndexId, remoteOlapTbl.getId(), index.getId(), localOlapTbl.getId(), localIndexId)); - return null; } List<Tablet> localTablets = localIndex.getTablets(); List<Tablet> remoteTablets = index.getTablets(); if (localTablets.size() != remoteTablets.size()) { - status = new Status(ErrCode.COMMON_ERROR, String.format( + return new Status(ErrCode.COMMON_ERROR, String.format( "the size of local tablet %s is not equals to the remote %s, " + "is_atomic_restore=true, remote table=%d, remote index=%d, " + "local table=%d, local index=%d", localTablets.size(), remoteTablets.size(), remoteOlapTbl.getId(), index.getId(), localOlapTbl.getId(), localIndexId)); - return null; } for (int i = 0; i < remoteTablets.size(); i++) { Tablet localTablet = localTablets.get(i); @@ -1068,14 +1065,13 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { List<Replica> localReplicas = localTablet.getReplicas(); List<Replica> remoteReplicas = remoteTablet.getReplicas(); if (localReplicas.size() != remoteReplicas.size()) { - status = new Status(ErrCode.COMMON_ERROR, String.format( + return new Status(ErrCode.COMMON_ERROR, String.format( "the size of local replicas %s is not equals to the remote %s, " + "is_atomic_restore=true, remote table=%d, remote index=%d, " + "local table=%d, local index=%d, local replicas=%d, remote replicas=%d", localTablets.size(), remoteTablets.size(), remoteOlapTbl.getId(), index.getId(), localOlapTbl.getId(), localIndexId, localReplicas.size(), remoteReplicas.size())); - return null; } for (int j = 0; j < remoteReplicas.size(); j++) { long backendId = localReplicas.get(j).getBackendId(); @@ -1093,7 +1089,7 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { } finally { localOlapTbl.readUnlock(); } - return tabletBases; + return Status.OK; } private void prepareAndSendSnapshotTaskForOlapTable(Database db) { @@ -1274,6 +1270,8 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { // ensure this replica is bound to the same backend disk as the origin table's replica. Pair<Long, Integer> baseTablet = tabletBases.get(restoreTablet.getId()); task.setBaseTablet(baseTablet.first, baseTablet.second); + LOG.info("set base tablet {} for replica {} in restore job {}, tablet id={}", + baseTablet.first, restoreReplica.getId(), jobId, restoreTablet.getId()); } batchTask.addTask(task); } @@ -1379,6 +1377,10 @@ public class RestoreJob extends AbstractJob implements GsonPostProcessable { long refTabletId = -1L; if (tabletBases != null && tabletBases.containsKey(localTablet.getId())) { refTabletId = tabletBases.get(localTablet.getId()).first; + if (LOG.isDebugEnabled()) { + LOG.debug("restored tablet {} is based on exists tablet {}", + localTablet.getId(), refTabletId); + } } long noReplicaId = -1L; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org