This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 7afe606b980 [fix](race) fix access colocate group ids race #36444 
(#36500)
7afe606b980 is described below

commit 7afe606b980e032d679dc8e44699bcf63c8bf5b8
Author: yujun <yu.jun.re...@gmail.com>
AuthorDate: Wed Jun 19 12:21:47 2024 +0800

    [fix](race) fix access colocate group ids race #36444 (#36500)
---
 .../apache/doris/catalog/ColocateTableIndex.java   |  2 +-
 .../clone/ColocateTableCheckerAndBalancer.java     | 23 +++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
index 23703278fd8..64dfb7af140 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColocateTableIndex.java
@@ -391,7 +391,7 @@ public class ColocateTableIndex implements Writable {
     public Set<GroupId> getAllGroupIds() {
         readLock();
         try {
-            return group2Tables.keySet();
+            return Sets.newHashSet(group2Tables.keySet());
         } finally {
             readUnlock();
         }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
 
b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
index e70ec445cd6..d3141a178df 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/clone/ColocateTableCheckerAndBalancer.java
@@ -378,7 +378,7 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
      *  A    B    C    D
      */
     private void relocateAndBalanceGroups() {
-        Set<GroupId> groupIds = 
Sets.newHashSet(Env.getCurrentEnv().getColocateTableIndex().getAllGroupIds());
+        Set<GroupId> groupIds = 
Env.getCurrentEnv().getColocateTableIndex().getAllGroupIds();
 
         // balance only inside each group, excluded balance between all groups
         Set<GroupId> changeGroups = relocateAndBalanceGroup(groupIds, false);
@@ -410,6 +410,10 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
             }
 
             ColocateGroupSchema groupSchema = 
colocateIndex.getGroupSchema(groupId);
+            if (groupSchema == null) {
+                LOG.info("Not found colocate group {}, maybe delete", groupId);
+                continue;
+            }
             ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc();
             try {
                 
Env.getCurrentSystemInfo().checkReplicaAllocation(replicaAlloc);
@@ -475,12 +479,19 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
         // check each group
         Set<GroupId> groupIds = colocateIndex.getAllGroupIds();
         for (GroupId groupId : groupIds) {
+            ColocateGroupSchema groupSchema = 
colocateIndex.getGroupSchema(groupId);
+            if (groupSchema == null) {
+                LOG.info("Not found colocate group {}, maybe delete", groupId);
+                continue;
+            }
+
             List<Long> tableIds = colocateIndex.getAllTableIds(groupId);
             List<Set<Long>> backendBucketsSeq = 
colocateIndex.getBackendsPerBucketSeqSet(groupId);
             if (backendBucketsSeq.isEmpty()) {
                 continue;
             }
 
+            ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc();
             String unstableReason = null;
             OUT:
             for (Long tableId : tableIds) {
@@ -499,8 +510,6 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
                 olapTable.readLock();
                 try {
                     for (Partition partition : olapTable.getPartitions()) {
-                        ReplicaAllocation replicaAlloc
-                                = 
olapTable.getPartitionInfo().getReplicaAllocation(partition.getId());
                         short replicationNum = 
replicaAlloc.getTotalReplicaNum();
                         long visibleVersion = partition.getVisibleVersion();
                         // Here we only get VISIBLE indexes. All other indexes 
are not queryable.
@@ -531,8 +540,7 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
                                     TabletSchedCtx tabletCtx = new 
TabletSchedCtx(
                                             TabletSchedCtx.Type.REPAIR,
                                             db.getId(), tableId, 
partition.getId(), index.getId(), tablet.getId(),
-                                            
olapTable.getPartitionInfo().getReplicaAllocation(partition.getId()),
-                                            System.currentTimeMillis());
+                                            replicaAlloc, 
System.currentTimeMillis());
                                     // the tablet status will be set again 
when being scheduled
                                     tabletCtx.setTabletStatus(st);
                                     tabletCtx.setPriority(Priority.NORMAL);
@@ -582,6 +590,7 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
         for (GroupId groupId : groupIds) {
             ColocateGroupSchema groupSchema = 
colocateIndex.getGroupSchema(groupId);
             if (groupSchema == null) {
+                LOG.info("Not found colocate group {}, maybe delete", groupId);
                 continue;
             }
             ReplicaAllocation replicaAlloc = groupSchema.getReplicaAlloc();
@@ -712,6 +721,10 @@ public class ColocateTableCheckerAndBalancer extends 
MasterDaemon {
             GlobalColocateStatistic globalColocateStatistic, List<List<Long>> 
balancedBackendsPerBucketSeq,
             boolean balanceBetweenGroups) {
         ColocateGroupSchema groupSchema = 
colocateIndex.getGroupSchema(groupId);
+        if (groupSchema == null) {
+            LOG.info("Not found colocate group {}, maybe delete", groupId);
+            return false;
+        }
         short replicaNum = 
groupSchema.getReplicaAlloc().getReplicaNumByTag(tag);
         List<List<Long>> backendsPerBucketSeq = Lists.newArrayList(
                 colocateIndex.getBackendsPerBucketSeqByTag(groupId, tag));


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to