This is an automated email from the ASF dual-hosted git repository.

ddanielr pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new a6733243a6 makes balancing a noop when balancer setup fails. (#5530)
a6733243a6 is described below

commit a6733243a625b6cc94fff4d166ba5ce5aab05b78
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Tue May 20 13:58:12 2025 -0400

    makes balancing a noop when balancer setup fails. (#5530)
    
    * WIP makes balancing a noop when balancer setup fails.
    
    In the case where setting up a balancer fails the manager currently
    falls back to using the SimpleLoadBalancer.  This fallback behavior
    could be very unhelpful and one problem (bad balancer config) into
    multiple problems as the simple load balancer may start assigning
    tablets in a way that is detrimental.
    
    The reason this is a draft is to get feedback on the approach. New test
    are needed, but those will be a lot more work than this simple change.
    Did not want to write test if we settle on a different approach. Need to
    test the following for system balancer and per table balancers.
    
     * Configuring a balancer class name that does not exists
     * Configuring a balancer class that is not of the correct type
     * Configuring a balancer class that throws an exception in it
       constructor
     * Configuring a balancer class that throws an exception when init is
       called
    
    * Rename to DoNothingBalancer. Updates log messages
    
    Renames the Derelict Balancer to the DoNothing Balancer.
    Updates the log messages to show that the ignore message is due to a
    failed state.
    
    * fix formatting
    
    * added test and fixed bugs
    
    * format code
    
    * code review updates
    
    * code review update
    
    ---------
    
    Co-authored-by: Daniel Roberts ddanielr <ddani...@gmail.com>
---
 .../core/spi/balancer/DoNothingBalancer.java       |  69 +++++++++++
 .../core/spi/balancer/TableLoadBalancer.java       |  42 +++----
 .../java/org/apache/accumulo/manager/Manager.java  |  22 +++-
 .../java/org/apache/accumulo/test/BalanceIT.java   |   2 +-
 .../org/apache/accumulo/test/BrokenBalancerIT.java | 136 +++++++++++++++++++++
 5 files changed, 240 insertions(+), 31 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/spi/balancer/DoNothingBalancer.java
 
b/core/src/main/java/org/apache/accumulo/core/spi/balancer/DoNothingBalancer.java
new file mode 100644
index 0000000000..7b730ebf59
--- /dev/null
+++ 
b/core/src/main/java/org/apache/accumulo/core/spi/balancer/DoNothingBalancer.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.core.spi.balancer;
+
+import org.apache.accumulo.core.data.TableId;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A balancer that will do nothing and warn about doing nothing. This purpose 
of this balancer is as
+ * a fallback when attempts to create a balancer fail.
+ *
+ * @since 2.1.4
+ */
+public class DoNothingBalancer implements TabletBalancer {
+
+  private static final Logger log = 
LoggerFactory.getLogger(DoNothingBalancer.class);
+
+  private final TableId tableId;
+
+  public DoNothingBalancer() {
+    this.tableId = null;
+  }
+
+  public DoNothingBalancer(TableId tableId) {
+    this.tableId = tableId;
+  }
+
+  @Override
+  public void init(BalancerEnvironment balancerEnvironment) {}
+
+  @Override
+  public void getAssignments(AssignmentParameters params) {
+    if (tableId != null) {
+      log.warn("Balancer creation failed. Ignoring {} assignment request for 
tableId {}",
+          params.unassignedTablets().size(), tableId);
+    } else {
+      log.warn("Balancer creation failed. Ignoring {} assignment request ",
+          params.unassignedTablets().size());
+    }
+  }
+
+  @Override
+  public long balance(BalanceParameters params) {
+    if (tableId != null) {
+      log.warn("Balancer creation failed. Ignoring request to balance tablets 
for tableId:{}",
+          tableId);
+    } else {
+      log.warn("Balancer creation failed. Ignoring request to balance 
tablets");
+    }
+    return 30_000;
+  }
+}
diff --git 
a/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java
 
b/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java
index 83ad4df530..27295b8ea9 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/spi/balancer/TableLoadBalancer.java
@@ -66,6 +66,17 @@ public class TableLoadBalancer implements TabletBalancer {
     return null;
   }
 
+  private TabletBalancer constructAndInitializeBalancer(String clazzName, 
TableId tableId) {
+    try {
+      var balancer = constructNewBalancerForTable(clazzName, tableId);
+      balancer.init(environment);
+      return balancer;
+    } catch (Exception e) {
+      log.warn("Failed to load table balancer class {} for table {}", 
clazzName, tableId, e);
+      return null;
+    }
+  }
+
   protected TabletBalancer getBalancerForTable(TableId tableId) {
     TabletBalancer balancer = perTableBalancers.get(tableId);
 
@@ -74,35 +85,16 @@ public class TableLoadBalancer implements TabletBalancer {
     if (clazzName == null) {
       clazzName = SimpleLoadBalancer.class.getName();
     }
-    if (balancer != null) {
-      if (!clazzName.equals(balancer.getClass().getName())) {
-        // the balancer class for this table does not match the class 
specified in the configuration
-        try {
-          balancer = constructNewBalancerForTable(clazzName, tableId);
-          perTableBalancers.put(tableId, balancer);
-          balancer.init(environment);
-
-          log.info("Loaded new class {} for table {}", clazzName, tableId);
-        } catch (Exception e) {
-          log.warn("Failed to load table balancer class {} for table {}", 
clazzName, tableId, e);
-        }
-      }
-    }
-    if (balancer == null) {
-      try {
-        balancer = constructNewBalancerForTable(clazzName, tableId);
-        log.info("Loaded class {} for table {}", clazzName, tableId);
-      } catch (Exception e) {
-        log.warn("Failed to load table balancer class {} for table {}", 
clazzName, tableId, e);
-      }
 
+    if (balancer == null || !clazzName.equals(balancer.getClass().getName())) {
+      balancer = constructAndInitializeBalancer(clazzName, tableId);
       if (balancer == null) {
-        log.info("Creating balancer {} limited to balancing table {}",
-            SimpleLoadBalancer.class.getName(), tableId);
-        balancer = new SimpleLoadBalancer(tableId);
+        balancer = 
constructAndInitializeBalancer(DoNothingBalancer.class.getName(), tableId);
+        log.error("Fell back to balancer {} for table {}", 
DoNothingBalancer.class.getName(),
+            tableId);
       }
+      log.info("Loaded class {} for table {}", balancer.getClass().getName(), 
tableId);
       perTableBalancers.put(tableId, balancer);
-      balancer.init(environment);
     }
     return balancer;
   }
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java 
b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
index ee9314d958..c69142fd80 100644
--- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
@@ -98,7 +98,7 @@ import org.apache.accumulo.core.metrics.MetricsProducer;
 import org.apache.accumulo.core.process.thrift.ServerProcessService;
 import org.apache.accumulo.core.replication.thrift.ReplicationCoordinator;
 import org.apache.accumulo.core.spi.balancer.BalancerEnvironment;
-import org.apache.accumulo.core.spi.balancer.TableLoadBalancer;
+import org.apache.accumulo.core.spi.balancer.DoNothingBalancer;
 import org.apache.accumulo.core.spi.balancer.TabletBalancer;
 import org.apache.accumulo.core.spi.balancer.data.TServerStatus;
 import org.apache.accumulo.core.spi.balancer.data.TabletMigration;
@@ -1918,10 +1918,22 @@ public class Manager extends AbstractServer implements 
LiveTServerSet.Listener,
   }
 
   void initializeBalancer() {
-    var localTabletBalancer = 
Property.createInstanceFromPropertyName(getConfiguration(),
-        Property.MANAGER_TABLET_BALANCER, TabletBalancer.class, new 
TableLoadBalancer());
-    localTabletBalancer.init(balancerEnvironment);
-    tabletBalancer = localTabletBalancer;
+    try {
+      getContext().getPropStore().getCache().removeAll();
+      getConfiguration().invalidateCache();
+      log.debug("Attempting to reinitialize balancer using class {}",
+          getConfiguration().get(Property.MANAGER_TABLET_BALANCER));
+      var localTabletBalancer = 
Property.createInstanceFromPropertyName(getConfiguration(),
+          Property.MANAGER_TABLET_BALANCER, TabletBalancer.class, new 
DoNothingBalancer());
+      localTabletBalancer.init(balancerEnvironment);
+      tabletBalancer = localTabletBalancer;
+    } catch (Exception e) {
+      log.warn("Failed to create balancer {} using {} instead",
+          getConfiguration().get(Property.MANAGER_TABLET_BALANCER), 
DoNothingBalancer.class, e);
+      var localTabletBalancer = new DoNothingBalancer();
+      localTabletBalancer.init(balancerEnvironment);
+      tabletBalancer = localTabletBalancer;
+    }
   }
 
   Class<?> getBalancerClass() {
diff --git a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java 
b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
index a282388ce7..e7505bebba 100644
--- a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
@@ -144,7 +144,7 @@ public class BalanceIT extends ConfigurableMacBase {
     }
   }
 
-  private Map<String,Integer> countLocations(AccumuloClient client, String 
tableName)
+  static Map<String,Integer> countLocations(AccumuloClient client, String 
tableName)
       throws Exception {
     var ctx = ((ClientContext) client);
     var ample = ctx.getAmple();
diff --git a/test/src/main/java/org/apache/accumulo/test/BrokenBalancerIT.java 
b/test/src/main/java/org/apache/accumulo/test/BrokenBalancerIT.java
new file mode 100644
index 0000000000..fafdaf9c5b
--- /dev/null
+++ b/test/src/main/java/org/apache/accumulo/test/BrokenBalancerIT.java
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.accumulo.test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import org.apache.accumulo.core.client.Accumulo;
+import org.apache.accumulo.core.client.AccumuloClient;
+import org.apache.accumulo.core.client.admin.NewTableConfiguration;
+import org.apache.accumulo.core.conf.Property;
+import org.apache.accumulo.core.data.TableId;
+import org.apache.accumulo.core.spi.balancer.BalancerEnvironment;
+import org.apache.accumulo.core.spi.balancer.SimpleLoadBalancer;
+import org.apache.accumulo.core.spi.balancer.TableLoadBalancer;
+import org.apache.accumulo.core.util.UtilWaitThread;
+import org.apache.accumulo.minicluster.ServerType;
+import org.apache.accumulo.miniclusterImpl.MiniAccumuloConfigImpl;
+import org.apache.accumulo.test.functional.ConfigurableMacBase;
+import org.apache.accumulo.test.util.Wait;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class BrokenBalancerIT extends ConfigurableMacBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(BrokenBalancerIT.class);
+
+  public static class BrokenBalancer extends SimpleLoadBalancer {
+    public BrokenBalancer() {
+      super();
+    }
+
+    public BrokenBalancer(TableId tableId) {
+      super(tableId);
+    }
+
+    @Override
+    public void init(BalancerEnvironment balancerEnvironment) {
+      throw new IllegalStateException();
+    }
+  }
+
+  @Override
+  public void configure(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+    Map<String,String> siteConfig = cfg.getSiteConfig();
+    siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
+    siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+    siteConfig.put(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL.getKey(), 
"3s");
+    cfg.setSiteConfig(siteConfig);
+    // ensure we have two tservers
+    if (cfg.getNumTservers() != 2) {
+      cfg.setNumTservers(2);
+    }
+  }
+
+  @Test
+  public void testBalancerException() throws Exception {
+    String tableName = getUniqueNames(1)[0];
+    testBadBalancer(BrokenBalancer.class.getName(), tableName);
+  }
+
+  @Test
+  public void testBalancerNotFound() throws Exception {
+    String tableName = getUniqueNames(1)[0];
+    testBadBalancer("org.apache.accumulo.abc.NonExistentBalancer", tableName);
+  }
+
+  private void testBadBalancer(String balancerClass, String tableName) throws 
Exception {
+    try (AccumuloClient c = 
Accumulo.newClient().from(getClientProperties()).build()) {
+      SortedSet<Text> splits = new TreeSet<>();
+      for (int i = 0; i < 10; i++) {
+        splits.add(new Text("" + i));
+      }
+      var props = Map.of(Property.TABLE_LOAD_BALANCER.getKey(), balancerClass);
+      NewTableConfiguration ntc =
+          new NewTableConfiguration().withSplits(splits).setProperties(props);
+      c.tableOperations().create(tableName, ntc);
+
+      assertEquals(Map.of(" none", 11), BalanceIT.countLocations(c, 
tableName));
+      UtilWaitThread.sleep(5000);
+      // scan should not be able to complete because the tablet should not be 
assigned
+      assertEquals(Map.of(" none", 11), BalanceIT.countLocations(c, 
tableName));
+
+      // fix the balancer config
+      log.info("fixing per tablet balancer");
+      c.tableOperations().setProperty(tableName, 
Property.TABLE_LOAD_BALANCER.getKey(),
+          SimpleLoadBalancer.class.getName());
+
+      Wait.waitFor(() -> 2 == BalanceIT.countLocations(c, tableName).size());
+
+      // break the balancer at the system level
+      log.info("breaking manager balancer");
+      
c.instanceOperations().setProperty(Property.MANAGER_TABLET_BALANCER.getKey(), 
balancerClass);
+
+      // add some tablet servers
+      assertEquals(2, getCluster().getConfig().getNumTservers());
+      getCluster().getConfig().setNumTservers(5);
+      getCluster().getClusterControl().start(ServerType.TABLET_SERVER);
+
+      UtilWaitThread.sleep(5000);
+
+      // should not have balanced across the two new tservers
+      assertEquals(2, BalanceIT.countLocations(c, tableName).size());
+
+      // fix the system level balancer
+      log.info("fixing manager balancer");
+      
c.instanceOperations().setProperty(Property.MANAGER_TABLET_BALANCER.getKey(),
+          TableLoadBalancer.class.getName());
+
+      // should eventually balance across all 5 tabletsevers
+      Wait.waitFor(() -> 5 == BalanceIT.countLocations(c, tableName).size());
+    }
+  }
+}

Reply via email to