This is an automated email from the ASF dual-hosted git repository. dlmarion pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit c6ec9697da2cfa517eb028d8937ecf10b31c9d6a Merge: 1636a87f66 12246867b3 Author: Dave Marion <dlmar...@apache.org> AuthorDate: Tue Dec 3 18:12:43 2024 +0000 Merge branch '3.1' .../java/org/apache/accumulo/manager/Manager.java | 38 ++++++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --cc server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index bf787607f0,64deeac56d..67782a73e1 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@@ -939,12 -1048,30 +939,30 @@@ public class Manager extends AbstractSe int attemptNum = 0; do { log.debug("Balancing for tables at level {}, times-in-loop: {}", dl, ++attemptNum); - params = BalanceParamsImpl.fromThrift(tserverStatusForBalancerLevel, - tServerGroupingForBalancer, tserverStatusForLevel, partitionedMigrations.get(dl)); + + SortedMap<TabletServerId,TServerStatus> statusForBalancerLevel = + tserverStatusForBalancerLevel; + if (attemptNum > 1 && (dl == DataLevel.ROOT || dl == DataLevel.METADATA)) { + // If we are still migrating then perform a re-check on the tablet + // servers to make sure non of them have failed. + Set<TServerInstance> currentServers = tserverSet.getCurrentServers(); + tserverStatus = gatherTableInformation(currentServers); + // Create a view of the tserver status such that it only contains the tables + // for this level in the tableMap. + tserverStatusForLevel = createTServerStatusView(dl, tserverStatus); + final SortedMap<TabletServerId,TServerStatus> tserverStatusForBalancerLevel2 = + new TreeMap<>(); + tserverStatusForLevel.forEach((tsi, status) -> tserverStatusForBalancerLevel2 + .put(new TabletServerIdImpl(tsi), TServerStatusImpl.fromThrift(status))); + statusForBalancerLevel = tserverStatusForBalancerLevel2; + } + - params = BalanceParamsImpl.fromThrift(statusForBalancerLevel, tserverStatusForLevel, - partitionedMigrations.get(dl)); ++ params = BalanceParamsImpl.fromThrift(statusForBalancerLevel, tServerGroupingForBalancer, ++ tserverStatusForLevel, partitionedMigrations.get(dl)); wait = Math.max(tabletBalancer.balance(params), wait); - migrationsOutForLevel = params.migrationsOut().size(); - for (TabletMigration m : checkMigrationSanity(tserverStatusForBalancerLevel.keySet(), - params.migrationsOut())) { + migrationsOutForLevel = 0; + for (TabletMigration m : checkMigrationSanity(statusForBalancerLevel.keySet(), + params.migrationsOut(), dl)) { final KeyExtent ke = KeyExtent.fromTabletId(m.getTablet()); if (migrations.containsKey(ke)) { log.warn("balancer requested migration more than once, skipping {}", m);