This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 2fe135b740 [fix](colocate table) fix backend no delete colocate 
tablets data (#23152)
2fe135b740 is described below

commit 2fe135b740c9eafbc5202a36d6e9d30ecabcd7b9
Author: yujun <yu.jun.re...@gmail.com>
AuthorDate: Tue Aug 22 17:48:32 2023 +0800

    [fix](colocate table) fix backend no delete colocate tablets data (#23152)
---
 .../org/apache/doris/master/ReportHandler.java     | 127 +++++++++++++++------
 1 file changed, 90 insertions(+), 37 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java 
b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
index a38b91ff78..b166a34ab1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java
@@ -17,7 +17,8 @@
 
 package org.apache.doris.master;
 
-
+import org.apache.doris.catalog.ColocateGroupSchema;
+import org.apache.doris.catalog.ColocateTableIndex;
 import org.apache.doris.catalog.Database;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.MaterializedIndex;
@@ -72,6 +73,7 @@ import org.apache.doris.thrift.TTablet;
 import org.apache.doris.thrift.TTabletInfo;
 import org.apache.doris.thrift.TTaskType;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.LinkedListMultimap;
 import com.google.common.collect.ListMultimap;
 import com.google.common.collect.Lists;
@@ -679,23 +681,17 @@ public class ReportHandler extends Daemon {
             TTablet backendTablet = backendTablets.get(tabletId);
             TTabletInfo backendTabletInfo = 
backendTablet.getTabletInfos().get(0);
             boolean needDelete = false;
-            TabletMeta tabletMeta = null;
+            TabletMeta tabletMeta = invertedIndex.getTabletMeta(tabletId);
             if (!tabletFoundInMeta.contains(tabletId)) {
                 if (isBackendReplicaHealthy(backendTabletInfo)) {
                     // if this tablet meta is still in invertedIndex. try to 
add it.
                     // if add failed. delete this tablet from backend.
-                    try {
-                        tabletMeta = invertedIndex.getTabletMeta(tabletId);
-                        if (tabletMeta != null) {
-                            addReplica(tabletId, tabletMeta, 
backendTabletInfo, backendId);
-                            // update counter
-                            ++addToMetaCounter;
-                        } else {
-                            needDelete = true;
-                        }
-                    } catch (MetaNotFoundException e) {
-                        LOG.debug("failed add to meta. tablet[{}], 
backend[{}]. {}",
-                                tabletId, backendId, e.getMessage());
+                    if (tabletMeta != null && addReplica(tabletId, tabletMeta, 
backendTabletInfo, backendId)) {
+                        // update counter
+                        ++addToMetaCounter;
+                        LOG.debug("add to meta. tablet[{}], backend[{}]", 
tabletId, backendId);
+                    } else {
+                        LOG.info("failed add to meta. tablet[{}], 
backend[{}]", tabletId, backendId);
                         needDelete = true;
                     }
                 } else {
@@ -710,7 +706,7 @@ public class ReportHandler extends Daemon {
                 DropReplicaTask task = new DropReplicaTask(backendId, 
tabletId, replicaId,
                         backendTabletInfo.getSchemaHash(), 
isDropTableOrPartition);
                 batchTask.addTask(task);
-                LOG.debug("delete tablet[{}] from backend[{}] because not 
found in meta", tabletId, backendId);
+                LOG.info("delete tablet[{}] from backend[{}] because not found 
in meta", tabletId, backendId);
                 ++deleteFromBackendCounter;
             }
         } // end for backendTabletIds
@@ -888,8 +884,8 @@ public class ReportHandler extends Daemon {
         AgentTaskExecutor.submit(batchTask);
     }
 
-    private static void addReplica(long tabletId, TabletMeta tabletMeta, 
TTabletInfo backendTabletInfo, long backendId)
-            throws MetaNotFoundException {
+    private static boolean addReplica(long tabletId, TabletMeta tabletMeta, 
TTabletInfo backendTabletInfo,
+            long backendId) {
         long dbId = tabletMeta.getDbId();
         long tableId = tabletMeta.getTableId();
         long partitionId = tabletMeta.getPartitionId();
@@ -901,44 +897,76 @@ public class ReportHandler extends Daemon {
         long remoteDataSize = backendTabletInfo.getRemoteDataSize();
         long rowCount = backendTabletInfo.getRowCount();
 
-        Database db = 
Env.getCurrentInternalCatalog().getDbOrMetaException(dbId);
-        OlapTable olapTable = (OlapTable) db.getTableOrMetaException(tableId, 
Table.TableType.OLAP);
-        olapTable.writeLockOrMetaException();
+        Database db;
+        OlapTable olapTable;
+        try {
+            db = Env.getCurrentInternalCatalog().getDbOrMetaException(dbId);
+            olapTable = (OlapTable) db.getTableOrMetaException(tableId, 
Table.TableType.OLAP);
+            olapTable.writeLockOrMetaException();
+        } catch (MetaNotFoundException e) {
+            LOG.warn(e);
+            return false;
+        }
+
         try {
             Partition partition = olapTable.getPartition(partitionId);
             if (partition == null) {
-                throw new MetaNotFoundException("partition[" + partitionId + 
"] does not exist");
+                LOG.warn("partition[{}] does not exist", partitionId);
+                return false;
             }
             ReplicaAllocation replicaAlloc = 
olapTable.getPartitionInfo().getReplicaAllocation(partition.getId());
 
             MaterializedIndex materializedIndex = partition.getIndex(indexId);
             if (materializedIndex == null) {
-                throw new MetaNotFoundException("index[" + indexId + "] does 
not exist");
+                LOG.warn("index[{}] does not exist", indexId);
+                return false;
             }
 
             Tablet tablet = materializedIndex.getTablet(tabletId);
             if (tablet == null) {
-                throw new MetaNotFoundException("tablet[" + tabletId + "] does 
not exist");
+                LOG.warn("tablet[{}] does not exist", tabletId);
+                return false;
             }
 
             long visibleVersion = partition.getVisibleVersion();
 
             // check replica version
             if (version < visibleVersion) {
-                throw new MetaNotFoundException("version is invalid. tablet[" 
+ version + "]"
-                        + ", visible[" + visibleVersion + "]");
+                LOG.warn("version is invalid. tablet[{}], visible[{}]", 
version, visibleVersion);
+                return false;
             }
 
             // check schema hash
             if (schemaHash != olapTable.getSchemaHashByIndexId(indexId)) {
-                throw new MetaNotFoundException("schema hash is diff[" + 
schemaHash + "-"
-                        + olapTable.getSchemaHashByIndexId(indexId) + "]");
+                LOG.warn("schema hash is diff[{}-{}]", schemaHash, 
olapTable.getSchemaHashByIndexId(indexId));
+                return false;
             }
 
             // colocate table will delete Replica in meta when balance
             // but we need to rely on MetaNotFoundException to decide whether 
delete the tablet in backend
-            if 
(Env.getCurrentColocateIndex().isColocateTable(olapTable.getId())) {
-                return;
+            // if the tablet is healthy, delete it.
+            boolean isColocateBackend = false;
+            ColocateTableIndex colocateTableIndex = 
Env.getCurrentColocateIndex();
+            if (colocateTableIndex.isColocateTable(olapTable.getId())) {
+                ColocateTableIndex.GroupId groupId = 
colocateTableIndex.getGroup(tableId);
+                Preconditions.checkState(groupId != null,
+                        "can not get colocate group for %s", tableId);
+                ColocateGroupSchema groupSchema = 
colocateTableIndex.getGroupSchema(groupId);
+                if (groupSchema != null) {
+                    replicaAlloc = groupSchema.getReplicaAlloc();
+                }
+                int tabletOrderIdx = 
materializedIndex.getTabletOrderIdx(tabletId);
+                Preconditions.checkState(tabletOrderIdx != -1, "get tablet 
materializedIndex for %s fail", tabletId);
+                Set<Long> backendsSet = 
colocateTableIndex.getTabletBackendsByGroup(groupId, tabletOrderIdx);
+                TabletStatus status =
+                        tablet.getColocateHealthStatus(visibleVersion, 
replicaAlloc, backendsSet);
+                if (status == TabletStatus.HEALTHY) {
+                    return false;
+                }
+
+                if (backendsSet.contains(backendId)) {
+                    isColocateBackend = true;
+                }
             }
 
             SystemInfoService infoService = Env.getCurrentSystemInfo();
@@ -947,7 +975,22 @@ public class ReportHandler extends Daemon {
                     db.getClusterName(), visibleVersion,
                     replicaAlloc, aliveBeIdsInCluster);
 
-            if (status.first == TabletStatus.VERSION_INCOMPLETE || 
status.first == TabletStatus.REPLICA_MISSING
+            // FORCE_REDUNDANT is a specific missing case.
+            // So it can add replica when it's in FORCE_REDUNDANT.
+            // But must be careful to avoid: delete a replica then add it 
back, then repeat forever.
+            // If this replica is sched available and existing another replica 
is sched unavailable,
+            // it's safe to add this replica.
+            // Because if the tablet scheduler want to delete a replica, it 
will choose the sched
+            // unavailable replica and avoid the repeating loop as above.
+            boolean canAddForceRedundant = status.first == 
TabletStatus.FORCE_REDUNDANT
+                    && infoService.checkBackendScheduleAvailable(backendId)
+                    && tablet.getReplicas().stream().anyMatch(
+                            r -> 
!infoService.checkBackendScheduleAvailable(r.getBackendId()));
+
+            if (isColocateBackend
+                    || canAddForceRedundant
+                    || status.first == TabletStatus.VERSION_INCOMPLETE
+                    || status.first == TabletStatus.REPLICA_MISSING
                     || status.first == TabletStatus.UNRECOVERABLE) {
                 long lastFailedVersion = -1L;
 
@@ -959,13 +1002,17 @@ public class ReportHandler extends Daemon {
                 // just throw exception in this case
                 if (version > partition.getNextVersion() - 1) {
                     // this is a fatal error
-                    throw new MetaNotFoundException("version is invalid. 
tablet[" + version + "]"
-                            + ", partition's max version [" + 
(partition.getNextVersion() - 1) + "]");
+                    LOG.warn("version is invalid. tablet[{}], partition's max 
version [{}]", version,
+                            partition.getNextVersion() - 1);
+                    return false;
                 } else if (version < partition.getCommittedVersion()) {
                     lastFailedVersion = partition.getCommittedVersion();
                 }
 
-                long replicaId = Env.getCurrentEnv().getNextId();
+                long replicaId = backendTabletInfo.getReplicaId();
+                if (replicaId <= 0) {
+                    replicaId = Env.getCurrentEnv().getNextId();
+                }
                 Replica replica = new Replica(replicaId, backendId, version, 
schemaHash,
                         dataSize, remoteDataSize, rowCount, 
ReplicaState.NORMAL,
                         lastFailedVersion, version);
@@ -980,18 +1027,24 @@ public class ReportHandler extends Daemon {
 
                 Env.getCurrentEnv().getEditLog().logAddReplica(info);
 
-                LOG.info("add replica[{}-{}] to catalog. backend[{}]", 
tabletId, replicaId, backendId);
+                LOG.info("add replica[{}-{}] to catalog. backend[{}], tablet 
status {}, tablet size {}, "
+                        + "is colocate backend {}",
+                        tabletId, replicaId, backendId, status.first.name(), 
tablet.getReplicas().size(),
+                        isColocateBackend);
+                return true;
             } else {
                 // replica is enough. check if this tablet is already in meta
                 // (status changed between 'tabletReport()' and 'addReplica()')
                 for (Replica replica : tablet.getReplicas()) {
                     if (replica.getBackendId() == backendId) {
                         // tablet is already in meta. return true
-                        return;
+                        return true;
                     }
                 }
-                throw new MetaNotFoundException(
-                        "replica is enough[" + tablet.getReplicas().size() + 
"-" + replicaAlloc.toCreateStmt() + "]");
+                LOG.warn("no add replica [{}-{}] cause it is enough[{}-{}], 
tablet status {}",
+                        tabletId, backendTabletInfo.getReplicaId(), 
tablet.getReplicas().size(),
+                        replicaAlloc.toCreateStmt(), status.first.name());
+                return false;
             }
         } finally {
             olapTable.writeUnlock();


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to