This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new d7c873314e8 [fix](balance) Fix PartitionRebalancer generating invalid 
moves to BEs without required storage medium (#62206) (#63755)
d7c873314e8 is described below

commit d7c873314e8f387bae5f7d645c478accc0146a4c
Author: deardeng <[email protected]>
AuthorDate: Thu May 28 10:36:55 2026 +0800

    [fix](balance) Fix PartitionRebalancer generating invalid moves to BEs 
without required storage medium (#62206) (#63755)
    
    pick from https://github.com/apache/doris/pull/62206
---
 docker/runtime/doris-compose/command.py            |  15 +-
 .../apache/doris/catalog/TabletInvertedIndex.java  |  13 +-
 .../apache/doris/clone/LoadStatisticForTag.java    |  13 +-
 .../apache/doris/clone/PartitionRebalancer.java    |   2 +-
 .../java/org/apache/doris/clone/RebalanceTest.java |  81 ++++++++++
 .../org/apache/doris/clone/RebalancerTestUtil.java |  16 ++
 .../doris/regression/suite/SuiteCluster.groovy     |  31 ++--
 ...est_partition_rebalancer_medium_mismatch.groovy | 178 +++++++++++++++++++++
 8 files changed, 329 insertions(+), 20 deletions(-)

diff --git a/docker/runtime/doris-compose/command.py 
b/docker/runtime/doris-compose/command.py
index c0abb4a257e..db8b403b4a0 100644
--- a/docker/runtime/doris-compose/command.py
+++ b/docker/runtime/doris-compose/command.py
@@ -391,7 +391,7 @@ class UpCommand(Command):
         )
         group1.add_argument("--be-disks",
                             nargs="*",
-                            default=["HDD=1"],
+                            default=None,
                             type=str,
                             help="Specify each be disks, each group is 
\"disk_type=disk_num[,disk_capactity]\", "\
                                     "disk_type is HDD or SSD, disk_capactity 
is capactity limit in gb. default: HDD=1. "\
@@ -618,7 +618,8 @@ class UpCommand(Command):
                 args.NAME, args.IMAGE, args.cloud, args.root, args.fe_config,
                 args.be_config, args.ms_config, args.recycle_config,
                 args.remote_master_fe, args.local_network_ip, args.fe_follower,
-                args.be_disks, args.be_cluster, args.reg_be, args.extra_hosts,
+                args.be_disks if args.be_disks is not None else ["HDD=1"],
+                args.be_cluster, args.reg_be, args.extra_hosts,
                 args.coverage_dir, cloud_store_config, args.sql_mode_node_mgr,
                 args.be_metaservice_endpoint, args.be_cluster_id, args.tde_ak, 
args.tde_sk)
             LOG.info("Create new cluster {} succ, cluster path is {}".format(
@@ -659,9 +660,19 @@ class UpCommand(Command):
                 related_nodes.append(node)
                 add_ids.append(node.id)
 
+        # If --be-disks is explicitly provided for an existing cluster,
+        # temporarily override cluster.be_disks so newly added BEs use
+        # the specified disk config instead of the original cluster config.
+        saved_be_disks = cluster.be_disks
+        if args.be_disks is not None:
+            cluster.be_disks = args.be_disks
+
         for node_type, add_num, add_ids in add_type_nums:
             do_add_node(node_type, add_num, add_ids)
 
+        # Restore original be_disks to avoid side effects
+        cluster.be_disks = saved_be_disks
+
         if args.IMAGE:
             for node in related_nodes:
                 node.set_image(args.IMAGE)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java 
b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
index d90a7861662..7a14afc051d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java
@@ -1011,7 +1011,9 @@ public class TabletInvertedIndex {
 
     // Only build from available bes, exclude colocate tables
     public Map<TStorageMedium, TreeMultimap<Long, PartitionBalanceInfo>> 
buildPartitionInfoBySkew(
-            List<Long> availableBeIds, Map<Long, Pair<TabletMove, Long>> 
movesInProgress) {
+            List<Long> availableBeIds,
+            Map<TStorageMedium, List<Long>> availableBeIdsByMedium,
+            Map<Long, Pair<TabletMove, Long>> movesInProgress) {
         Set<Long> dbIds = Sets.newHashSet();
         Set<Long> tableIds = Sets.newHashSet();
         Set<Long> partitionIds = Sets.newHashSet();
@@ -1072,11 +1074,14 @@ public class TabletInvertedIndex {
                     Map<Long, Long> countMap = partitionReplicasInfo.get(
                             tabletMeta.getPartitionId(), 
tabletMeta.getIndexId());
                     if (countMap == null) {
-                        // If one be doesn't have any replica of one 
partition, it should be counted too.
-                        countMap = 
availableBeIds.stream().collect(Collectors.toMap(i -> i, i -> 0L));
+                        // If one be doesn't have any replica of one partition,
+                        // it should be counted too.
+                        List<Long> availableBeIdsForMedium = 
availableBeIdsByMedium.getOrDefault(
+                                medium, Lists.newArrayList());
+                        countMap = 
availableBeIdsForMedium.stream().collect(Collectors.toMap(i -> i, i -> 0L));
                     }
 
-                    Long count = countMap.get(beId);
+                    Long count = countMap.getOrDefault(beId, 0L);
                     countMap.put(beId, count + 1L);
                     partitionReplicasInfo.put(tabletMeta.getPartitionId(), 
tabletMeta.getIndexId(), countMap);
                     partitionReplicasInfoMaps.put(medium, 
partitionReplicasInfo);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
index 60a0d147917..e731d0701e0 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java
@@ -159,7 +159,8 @@ public class LoadStatisticForTag {
         // Only count the available be
         for (TStorageMedium medium : TStorageMedium.values()) {
             TreeMultimap<Long, Long> beByTotalReplicaCount = 
TreeMultimap.create();
-            
beLoadStatistics.stream().filter(BackendLoadStatistic::isAvailable).forEach(beStat
 ->
+            beLoadStatistics.stream().filter(BackendLoadStatistic::isAvailable)
+                    .filter(beStat -> beStat.hasMedium(medium)).forEach(beStat 
->
                     beByTotalReplicaCount.put(beStat.getReplicaNum(medium), 
beStat.getBeId()));
             beByTotalReplicaCountMaps.put(medium, beByTotalReplicaCount);
         }
@@ -173,9 +174,17 @@ public class LoadStatisticForTag {
                     .filter(BackendLoadStatistic::isAvailable)
                     .map(BackendLoadStatistic::getBeId)
                     .collect(Collectors.toList());
+            Map<TStorageMedium, List<Long>> availableBeIdsByMedium = 
Maps.newHashMap();
+            for (TStorageMedium medium : TStorageMedium.values()) {
+                availableBeIdsByMedium.put(medium, beLoadStatistics.stream()
+                        .filter(BackendLoadStatistic::isAvailable)
+                        .filter(be -> be.hasMedium(medium))
+                        .map(BackendLoadStatistic::getBeId)
+                        .collect(Collectors.toList()));
+            }
             Map<Long, Pair<TabletMove, Long>> movesInProgress = rebalancer == 
null ? Maps.newHashMap()
                     : ((PartitionRebalancer) rebalancer).getMovesInProgress();
-            skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds, 
movesInProgress);
+            skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds, 
availableBeIdsByMedium, movesInProgress);
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java 
b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
index 30a7a76b920..96db70cf76f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java
@@ -305,7 +305,7 @@ public class PartitionRebalancer extends Rebalancer {
 
             List<RootPathLoadStatistic> paths = beStat.getPathStatistics();
             List<Long> availPath = paths.stream().filter(path -> 
path.getStorageMedium() == tabletCtx.getStorageMedium()
-                            && path.isFit(tabletCtx.getTabletSize(), false) == 
BalanceStatus.OK)
+                            && path.isFit(tabletCtx.getTabletSize(), 
false).ok())
                     
.map(RootPathLoadStatistic::getPathHash).collect(Collectors.toList());
             long pathHash = slot.takeAnAvailBalanceSlotFrom(availPath, 
tabletCtx.getTag(),
                     tabletCtx.getStorageMedium());
diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
index fc3bbb28485..e50f25d3ae4 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java
@@ -322,6 +322,87 @@ public class RebalanceTest {
         Assert.assertEquals(needCheckTablets.size(), succeeded.get());
     }
 
+    // Test for OPENSOURCE-192: PartitionRebalancer should not generate moves
+    // targeting a BE that lacks the required storage medium.
+    // Scenario: SSD tablets on BE 20001/20002, new BE 20003 has only HDD.
+    // Without the fix, the algorithm would pick BE 20003 (0 SSD replicas) as 
the
+    // "least loaded" destination for SSD tablets, causing infinite scheduling 
failures.
+    @Test
+    public void testPartitionRebalancerSkipBEWithoutMedium() {
+        Configurator.setLevel("org.apache.doris.clone.PartitionRebalancer", 
Level.DEBUG);
+
+        // Add backends: 20001, 20002 have SSD; 20003 has only HDD
+        systemInfoService.addBackend(
+                RebalancerTestUtil.createBackend(20001L, 2048, 0, 
TStorageMedium.SSD));
+        systemInfoService.addBackend(
+                RebalancerTestUtil.createBackend(20002L, 2048, 0, 
TStorageMedium.SSD));
+        systemInfoService.addBackend(
+                RebalancerTestUtil.createBackend(20003L, 2048, 0, 
TStorageMedium.HDD));
+
+        // Create a table with SSD partition
+        OlapTable ssdTable = new OlapTable(3, "ssd table", new ArrayList<>(),
+                KeysType.DUP_KEYS, new RangePartitionInfo(), new 
HashDistributionInfo());
+        db.registerTable(ssdTable);
+
+        MaterializedIndex ssdIndex = new MaterializedIndex(ssdTable.getId(), 
null);
+        long partId = 41;
+        Partition partition = new Partition(partId, "p0", ssdIndex, new 
HashDistributionInfo());
+        ssdTable.addPartition(partition);
+        ssdTable.getPartitionInfo().addPartition(partId, new 
DataProperty(TStorageMedium.SSD),
+                ReplicaAllocation.DEFAULT_ALLOCATION, false, true);
+        ssdTable.setIndexMeta(ssdIndex.getId(), "ssd index", 
Lists.newArrayList(new Column()),
+                0, 0, (short) 0, TStorageType.COLUMN, KeysType.DUP_KEYS);
+
+        // Create SSD tablets: 3 replicas on BE 20001, 1 on BE 20002
+        // This creates skew = 3 - 1 = 2 among SSD BEs (with fix),
+        // or skew = 3 - 0 = 3 counting HDD-only BEs (without fix)
+        RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", 
TStorageMedium.SSD,
+                80001, Lists.newArrayList(20001L));
+        RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", 
TStorageMedium.SSD,
+                80002, Lists.newArrayList(20001L));
+        RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", 
TStorageMedium.SSD,
+                80003, Lists.newArrayList(20001L));
+        RebalancerTestUtil.createTablet(invertedIndex, db, ssdTable, "p0", 
TStorageMedium.SSD,
+                80004, Lists.newArrayList(20002L));
+
+        // Regenerate statistics with partition rebalancer
+        Config.tablet_rebalancer_type = "partition";
+        LoadStatisticForTag loadStatistic = new LoadStatisticForTag(
+                Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex, 
null);
+        loadStatistic.init();
+        Map<Tag, LoadStatisticForTag> ssdStatMap = Maps.newHashMap();
+        ssdStatMap.put(Tag.DEFAULT_BACKEND_TAG, loadStatistic);
+
+        PartitionRebalancer rebalancer = new 
PartitionRebalancer(Env.getCurrentSystemInfo(),
+                Env.getCurrentInvertedIndex(), null);
+        rebalancer.updateLoadStatistic(ssdStatMap);
+        rebalancer.selectAlternativeTablets();
+
+        // Verify: moves were generated (test is meaningful)
+        Map<Long, Pair<PartitionRebalancer.TabletMove, Long>> moves = 
rebalancer.getMovesInProgress();
+        Assert.assertFalse("Should generate moves for skewed SSD partition", 
moves.isEmpty());
+
+        // Verify: no move targets BE 20003 (HDD-only) or any of the HDD BEs 
from setUp (10001-10004)
+        for (Map.Entry<Long, Pair<PartitionRebalancer.TabletMove, Long>> entry 
: moves.entrySet()) {
+            PartitionRebalancer.TabletMove move = entry.getValue().first;
+            Assert.assertNotEquals("Move should not target HDD-only BE for SSD 
tablet",
+                    Long.valueOf(20003L), move.toBe);
+            Assert.assertFalse("Move should not target any BE without SSD",
+                    move.toBe == 10001L || move.toBe == 10002L
+                            || move.toBe == 10003L || move.toBe == 10004L);
+        }
+
+        // Verify: all moves go from BE 20001 (most loaded) to BE 20002 (least 
loaded with SSD)
+        for (Map.Entry<Long, Pair<PartitionRebalancer.TabletMove, Long>> entry 
: moves.entrySet()) {
+            PartitionRebalancer.TabletMove move = entry.getValue().first;
+            Assert.assertEquals("Source should be the most loaded SSD BE",
+                    Long.valueOf(20001L), move.fromBe);
+            Assert.assertEquals("Dest should be the least loaded SSD BE",
+                    Long.valueOf(20002L), move.toBe);
+        }
+        LOG.info("testPartitionRebalancerSkipBEWithoutMedium success");
+    }
+
     @Test
     public void testMoveInProgressMap() {
         Configurator.setLevel("org.apache.doris.clone.MovesInProgressCache", 
Level.DEBUG);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java 
b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
index 1e6af5c7324..58c021370c0 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalancerTestUtil.java
@@ -49,6 +49,22 @@ public class RebalancerTestUtil {
         return createBackend(id, totalCap, Lists.newArrayList(usedCap), 1);
     }
 
+    // Add only one path with specified storage medium, PathHash:id
+    public static Backend createBackend(long id, long totalCap, long usedCap, 
TStorageMedium medium) {
+        Backend be = new Backend(id, "192.168.0." + id, 9051);
+        Map<String, DiskInfo> disks = Maps.newHashMap();
+        DiskInfo diskInfo = new DiskInfo("/path1");
+        diskInfo.setPathHash(id);
+        diskInfo.setTotalCapacityB(totalCap);
+        diskInfo.setDataUsedCapacityB(usedCap);
+        diskInfo.setAvailableCapacityB(totalCap - usedCap);
+        diskInfo.setStorageMedium(medium);
+        disks.put(diskInfo.getRootPath(), diskInfo);
+        be.setDisks(ImmutableMap.copyOf(disks));
+        be.setAlive(true);
+        return be;
+    }
+
     /**
      * size of usedCaps should equal to diskNum.
      */
diff --git 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
index 16b18c00cfa..513c0a5b3bd 100644
--- 
a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
+++ 
b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy
@@ -517,10 +517,15 @@ class SuiteCluster {
     }
 
     List<Integer> addFrontend(int num, boolean followerMode=false) throws 
Exception {
-        def result = add(num, 0, null, followerMode)
+        def result = add(0, num, '', false, null)
         return result.first
     }
 
+    List<Integer> addBackend(int num, List<String> beDisks) throws Exception {
+        def result = add(0, num, '', false, beDisks)
+        return result.second
+    }
+
     List<Integer> addBackend(int num, String ClusterName='') throws Exception {
         def result = add(0, num, ClusterName)
         return result.second
@@ -528,29 +533,33 @@ class SuiteCluster {
 
     // ATTN: clusterName just used for cloud mode, 1 cluster has n bes
     // ATTN: followerMode just used for cloud mode
-    Tuple2<List<Integer>, List<Integer>> add(int feNum, int beNum, String 
clusterName, boolean followerMode=false) throws Exception {
+    // ATTN: beDisks just used for not cloud mode
+    Tuple2<List<Integer>, List<Integer>> add(int feNum, int beNum, String 
clusterName, boolean followerMode=false, List<String> beDisks=null) throws 
Exception {
         assert feNum > 0 || beNum > 0
 
-        def sb = new StringBuilder()
-        sb.append('up ' + name + ' ')
+        def cmd = ['up', name]
         if (feNum > 0) {
-            sb.append('--add-fe-num ' + feNum + ' ')
+            cmd += ['--add-fe-num', String.valueOf(feNum)]
             if (followerMode) {
-                sb.append('--fe-follower' + ' ')
+                cmd += ['--fe-follower']
             }
             if (sqlModeNodeMgr) {
-                sb.append('--sql-mode-node-mgr' + ' ')
+                cmd += ['--sql-mode-node-mgr']
             }
         }
         if (beNum > 0) {
-            sb.append('--add-be-num ' + beNum + ' ')
+            cmd += ['--add-be-num', String.valueOf(beNum)]
             if (clusterName != null && !clusterName.isEmpty()) {
-                sb.append(' --be-cluster ' + clusterName + ' ')
+                cmd += ['--be-cluster', clusterName]
             }
         }
-        sb.append('--wait-timeout 60')
+        if (beDisks != null && !beDisks.isEmpty()) {
+            cmd += ['--be-disks']
+            cmd += beDisks
+        }
+        cmd += ['--wait-timeout', '60']
 
-        def data = (Map<String, Map<String, Object>>) runCmd(sb.toString(), 
180)
+        def data = (Map<String, Map<String, Object>>) runCmdList(cmd, 180)
         def newFrontends = (List<Integer>) data.get('fe').get('add_list')
         def newBackends = (List<Integer>) data.get('be').get('add_list')
 
diff --git 
a/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy
 
b/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy
new file mode 100644
index 00000000000..5aceb148914
--- /dev/null
+++ 
b/regression-test/suites/storage_medium_p0/test_partition_rebalancer_medium_mismatch.groovy
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.apache.doris.regression.suite.ClusterOptions
+
+/**
+ * Reproduce OPENSOURCE-192:
+ * When tablet_rebalancer_type=Partition, adding a new BE with only HDD disks
+ * to a cluster where tables are created with storage_medium=SSD causes
+ * the PartitionRebalancer to generate invalid moves (SSD tablets -> HDD-only 
BE),
+ * resulting in infinite "paths has no available balance slot: []" errors.
+ *
+ * Root cause: In LoadStatisticForTag.init(), the beByTotalReplicaCount map
+ * for each medium includes ALL available BEs without checking hasMedium().
+ * Similarly, TabletInvertedIndex.buildPartitionInfoBySkew() includes all
+ * availableBeIds in countMap without medium filtering. This causes the
+ * greedy algorithm to generate moves targeting BEs that lack the required
+ * storage medium.
+ *
+ * Setup:
+ *   - 3 initial BEs with SSD + HDD disks
+ *   - Table created with storage_medium = SSD (explicitly specified)
+ *   - Add 1 new BE with HDD only (via addBackend with custom beDisks)
+ *   - PartitionRebalancer generates invalid moves to the HDD-only BE
+ */
+suite('test_partition_rebalancer_medium_mismatch', 'docker') {
+    if (isCloudMode()) {
+        return
+    }
+
+    def options = new ClusterOptions()
+    options.feConfigs += [
+        'tablet_rebalancer_type=Partition',
+        'schedule_slot_num_per_hdd_path=8',
+        'balance_slot_num_per_path=2',
+        'disable_balance=false',
+        'disable_disk_balance=true',
+        'tablet_checker_interval_ms=2000',
+        'schedule_batch_size=1000',
+    ]
+    options.beConfigs += [
+        'report_disk_state_interval_seconds=2',
+        'report_tablet_interval_seconds=3',
+    ]
+    // Initial 3 BEs: each has 1 SSD + 1 HDD
+    options.beDisks = ['SSD=1', 'HDD=1']
+    options.beNum = 3
+
+    docker(options) {
+        // Step 1: Create table explicitly with SSD medium
+        def table = 'tbl_ssd_balance'
+        sql "DROP TABLE IF EXISTS ${table} FORCE"
+        sql """
+            CREATE TABLE ${table} (
+                k1 INT,
+                k2 VARCHAR(100),
+                v1 INT
+            )
+            DISTRIBUTED BY HASH(k1) BUCKETS 10
+            PROPERTIES (
+                'replication_num' = '1',
+                'storage_medium' = 'SSD'
+            )
+        """
+
+        // Verify partition medium is SSD
+        def partitions = sql_return_maparray "SHOW PARTITIONS FROM ${table}"
+        assertTrue(partitions.size() > 0)
+        partitions.each {
+            assertEquals('SSD', it.StorageMedium)
+        }
+        log.info("Table created with SSD medium, partitions: 
${partitions.size()}")
+
+        // Step 2: Insert data to distribute tablets across existing BEs
+        for (int i = 0; i < 100; i++) {
+            sql "INSERT INTO ${table} VALUES (${i}, 'value_${i}', ${i * 10})"
+        }
+
+        def count = sql "SELECT COUNT(*) FROM ${table}"
+        assertEquals(100, count[0][0] as int)
+
+        // Record tablet distribution before expansion
+        def tabletsBefore = sql_return_maparray "SHOW TABLETS FROM ${table}"
+        log.info("Tablets before expansion: ${tabletsBefore.size()}")
+        def beIdsBefore = tabletsBefore.collect { it.BackendId }.unique()
+        log.info("Tablets on BEs: ${beIdsBefore}")
+
+        // Let scheduler settle
+        sleep(10000)
+
+        // Step 3: Add a new BE with HDD only (different disk config from 
initial BEs)
+        log.info("Adding new BE with HDD-only disks...")
+        def newBeIndices = cluster.addBackend(1, ['HDD=1'])
+        log.info("New BE added with indices: ${newBeIndices}")
+
+        // Wait for new BE heartbeat and disk report
+        sleep(8000)
+
+        // Verify all backends
+        def backends = sql_return_maparray "SHOW BACKENDS"
+        log.info("Total backends after expansion: ${backends.size()}")
+        assertEquals(4, backends.size())
+
+        // Find the new BE
+        def newBeId = null
+        for (def be : backends) {
+            if (!(be.BackendId in beIdsBefore.collect { it as String })) {
+                newBeId = be.BackendId
+                break
+            }
+        }
+        assertNotNull(newBeId, "Should find new BE")
+        log.info("New BE id: ${newBeId}")
+
+        // Verify new BE has only HDD
+        def newBeDisks = sql_return_maparray "SHOW PROC '/backends/${newBeId}'"
+        log.info("New BE disks: ${newBeDisks}")
+        def hasSSD = newBeDisks.any { it.StorageMedium == 'SSD' }
+        def hasHDD = newBeDisks.any { it.StorageMedium == 'HDD' }
+        assertTrue(hasHDD, "New BE should have HDD disk")
+        assertFalse(hasSSD, "New BE should NOT have SSD disk")
+
+        // Step 4: Wait for PartitionRebalancer to attempt balance scheduling
+        // The bug: algorithm generates moves targeting the HDD-only BE for 
SSD tablets
+        log.info("Waiting for PartitionRebalancer to run (60s)...")
+        sleep(60000)
+
+        // Step 5: Check balance history for the bug signature
+        def schedHistory = sql_return_maparray "SHOW PROC 
'/cluster_balance/history_tablets'"
+        def failedWithEmptySlot = schedHistory.findAll {
+            it.ErrMsg != null && it.ErrMsg.contains('paths has no available 
balance slot: []')
+        }
+
+        log.info("Total history entries: ${schedHistory.size()}")
+        log.info("Entries with 'empty slot' error: 
${failedWithEmptySlot.size()}")
+
+        if (failedWithEmptySlot.size() > 0) {
+            log.warn("BUG REPRODUCED (OPENSOURCE-192)! " +
+                     "Found ${failedWithEmptySlot.size()} balance tasks " +
+                     "failed with 'paths has no available balance slot: []'")
+            failedWithEmptySlot.take(5).each { task ->
+                log.warn("  tablet=${task.TabletId}, dest=${task.DestBe}, 
err=${task.ErrMsg}")
+            }
+            // This assertion will fail when the bug is present, and pass 
after fix
+            fail("BUG: PartitionRebalancer generated invalid moves to HDD-only 
BE for SSD tablets")
+        } else {
+            log.info("No 'empty slot' failures. Bug not triggered or already 
fixed.")
+        }
+
+        // Step 6: Check that no tablets moved to the new BE
+        // (since it has no SSD, SSD tablets should NOT be relocated there)
+        def tabletsAfter = sql_return_maparray "SHOW TABLETS FROM ${table}"
+        def tabletsOnNewBe = tabletsAfter.findAll { it.BackendId == newBeId }
+        log.info("Tablets on new HDD-only BE: ${tabletsOnNewBe.size()}")
+        assertEquals(0, tabletsOnNewBe.size())
+
+        // Step 7: Verify data integrity
+        def countAfter = sql "SELECT COUNT(*) FROM ${table}"
+        assertEquals(100, countAfter[0][0] as int)
+
+        // Cleanup
+        sql "DROP TABLE IF EXISTS ${table} FORCE"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to