This is an automated email from the ASF dual-hosted git repository.

ddanielr pushed a commit to branch elasticity
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit d4249cfb18fe03a93667603b4d5eafd3d44c6164
Merge: 24ead4b4bd 2cab146529
Author: Daniel Roberts <ddani...@gmail.com>
AuthorDate: Sun Jul 28 19:48:58 2024 +0000

    Merge branch 'main' into elasticity

 .../accumulo/core/metrics/MetricsProducer.java     | 10 +++++
 .../java/org/apache/accumulo/manager/Manager.java  |  9 ++++
 .../accumulo/manager/metrics/BalancerMetrics.java  | 51 ++++++++++++++++++++++
 .../java/org/apache/accumulo/test/BalanceIT.java   | 14 ++++++
 .../BalanceInPresenceOfOfflineTableIT.java         |  2 +
 .../apache/accumulo/test/metrics/MetricsIT.java    |  1 +
 6 files changed, 87 insertions(+)

diff --cc 
core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
index 08d8bc6514,f742ac460b..7383ee9e3e
--- a/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
+++ b/core/src/main/java/org/apache/accumulo/core/metrics/MetricsProducer.java
@@@ -617,27 -581,14 +617,35 @@@ import io.micrometer.core.instrument.Me
   * <td>Distribution Summary</td>
   * <td></td>
   * </tr>
 + * <tr>
 + * <td>N/A</td>
 + * <td>N/A</td>
 + * <td>{@link #METRICS_MANAGER_ROOT_TGW_ERRORS}</td>
 + * <td>Gauge</td>
 + * <td></td>
 + * </tr>
 + * <tr>
 + * <td>N/A</td>
 + * <td>N/A</td>
 + * <td>{@link #METRICS_MANAGER_META_TGW_ERRORS}</td>
 + * <td>Gauge</td>
 + * <td></td>
 + * </tr>
 + * <tr>
 + * <td>N/A</td>
 + * <td>N/A</td>
 + * <td>{@link #METRICS_MANAGER_USER_TGW_ERRORS}</td>
 + * <td>Gauge</td>
 + * <td></td>
 + * </tr>
+  * <!-- Balancing -->
+  * <tr>
+  * <td>N/A</td>
+  * <td>N/A</td>
+  * <td>{@value METRICS_MANAGER_BALANCER_MIGRATIONS_NEEDED}</td>
+  * <td>Gauge</td>
+  * <td>The number of migrations that need to complete before the system is 
balanced</td>
+  * </tr>
   * </table>
   *
   * @since 2.1.0
diff --cc server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
index 21e4d271d4,a7fb4a3a9d..260f1823f5
--- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
+++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java
@@@ -120,10 -113,9 +120,11 @@@ import org.apache.accumulo.core.util.th
  import org.apache.accumulo.core.util.threads.Threads;
  import org.apache.accumulo.core.util.time.NanoTime;
  import org.apache.accumulo.core.util.time.SteadyTime;
 +import 
org.apache.accumulo.manager.compaction.coordinator.CompactionCoordinator;
+ import org.apache.accumulo.manager.metrics.BalancerMetrics;
  import org.apache.accumulo.manager.metrics.ManagerMetrics;
  import org.apache.accumulo.manager.recovery.RecoveryManager;
 +import org.apache.accumulo.manager.split.Splitter;
  import org.apache.accumulo.manager.state.TableCounts;
  import org.apache.accumulo.manager.tableOps.TraceRepo;
  import org.apache.accumulo.manager.upgrade.PreUpgradeValidation;
@@@ -215,8 -208,9 +216,9 @@@ public class Manager extends AbstractSe
  
    ServiceLock managerLock = null;
    private TServer clientService = null;
 -  private volatile TabletBalancer tabletBalancer;
 +  protected volatile TabletBalancer tabletBalancer;
    private final BalancerEnvironment balancerEnvironment;
+   private final BalancerMetrics balancerMetrics = new BalancerMetrics();
  
    private ManagerState state = ManagerState.INITIAL;
  
@@@ -536,28 -570,122 +538,32 @@@
      }
    }
  
 -  public MetricsProducer getBalancerMetrics() {
 -    return balancerMetrics;
 -  }
 -
 -  enum TabletGoalState {
 -    HOSTED(TUnloadTabletGoal.UNKNOWN),
 -    UNASSIGNED(TUnloadTabletGoal.UNASSIGNED),
 -    DELETED(TUnloadTabletGoal.DELETED),
 -    SUSPENDED(TUnloadTabletGoal.SUSPENDED);
 -
 -    private final TUnloadTabletGoal unloadGoal;
 +  private Splitter splitter;
  
 -    TabletGoalState(TUnloadTabletGoal unloadGoal) {
 -      this.unloadGoal = unloadGoal;
 -    }
 -
 -    /** The purpose of unloading this tablet. */
 -    public TUnloadTabletGoal howUnload() {
 -      return unloadGoal;
 -    }
 +  public Splitter getSplitter() {
 +    return splitter;
    }
  
 -  TabletGoalState getSystemGoalState(TabletLocationState tls) {
 -    switch (getManagerState()) {
 -      case NORMAL:
 -        return TabletGoalState.HOSTED;
 -      case HAVE_LOCK: // fall-through intended
 -      case INITIAL: // fall-through intended
 -      case SAFE_MODE:
 -        if (tls.extent.isMeta()) {
 -          return TabletGoalState.HOSTED;
 -        }
 -        return TabletGoalState.UNASSIGNED;
 -      case UNLOAD_METADATA_TABLETS:
 -        if (tls.extent.isRootTablet()) {
 -          return TabletGoalState.HOSTED;
 -        }
 -        return TabletGoalState.UNASSIGNED;
 -      case UNLOAD_ROOT_TABLET:
 -      case STOP:
 -        return TabletGoalState.UNASSIGNED;
 -      default:
 -        throw new IllegalStateException("Unknown Manager State");
 -    }
++  public MetricsProducer getBalancerMetrics() {
++    return balancerMetrics;
+   }
+ 
 -  TabletGoalState getTableGoalState(KeyExtent extent) {
 -    TableState tableState = 
getContext().getTableManager().getTableState(extent.tableId());
 -    if (tableState == null) {
 -      return TabletGoalState.DELETED;
 -    }
 -    switch (tableState) {
 -      case DELETING:
 -        return TabletGoalState.DELETED;
 -      case OFFLINE:
 -      case NEW:
 -        return TabletGoalState.UNASSIGNED;
 -      default:
 -        return TabletGoalState.HOSTED;
 -    }
 +  public UpgradeCoordinator.UpgradeStatus getUpgradeStatus() {
 +    return upgradeCoordinator.getStatus();
    }
  
 -  TabletGoalState getGoalState(TabletLocationState tls, MergeInfo mergeInfo) {
 -    KeyExtent extent = tls.extent;
 -    // Shutting down?
 -    TabletGoalState state = getSystemGoalState(tls);
 -    if (state == TabletGoalState.HOSTED) {
 -      if (!upgradeCoordinator.getStatus().isParentLevelUpgraded(extent)) {
 -        // The place where this tablet stores its metadata was not upgraded, 
so do not assign this
 -        // tablet yet.
 -        return TabletGoalState.UNASSIGNED;
 -      }
 -
 -      if (tls.current != null && 
serversToShutdown.contains(tls.current.getServerInstance())) {
 -        return TabletGoalState.SUSPENDED;
 -      }
 -      // Handle merge transitions
 -      if (mergeInfo.getExtent() != null) {
 +  public CompactionCoordinator getCompactionCoordinator() {
 +    return compactionCoordinator;
 +  }
  
 -        final boolean overlaps = mergeInfo.overlaps(extent);
 +  public void hostOndemand(List<KeyExtent> extents) {
 +    extents.forEach(e -> 
Preconditions.checkArgument(DataLevel.of(e.tableId()) == DataLevel.USER));
  
 -        if (overlaps) {
 -          log.debug("mergeInfo overlaps: {} true", extent);
 -          switch (mergeInfo.getState()) {
 -            case NONE:
 -            case COMPLETE:
 -              break;
 -            case STARTED:
 -              return TabletGoalState.HOSTED;
 -            case WAITING_FOR_OFFLINE:
 -              // If we have walogs we need to be HOSTED to recover
 -              if (!tls.walogs.isEmpty()) {
 -                return TabletGoalState.HOSTED;
 -              } else {
 -                return TabletGoalState.UNASSIGNED;
 -              }
 -            case MERGING:
 -            case MERGED:
 -              return TabletGoalState.UNASSIGNED;
 -          }
 -        } else {
 -          log.trace("mergeInfo overlaps: {} false", extent);
 -        }
 -      }
 -
 -      // taking table offline?
 -      state = getTableGoalState(extent);
 -      if (state == TabletGoalState.HOSTED) {
 -        // Maybe this tablet needs to be migrated
 -        TServerInstance dest = migrations.get(extent);
 -        if (dest != null && tls.current != null && 
!dest.equals(tls.current.getServerInstance())) {
 -          return TabletGoalState.UNASSIGNED;
 -        }
 +    for (var watcher : watchers) {
 +      if (watcher.getLevel() == DataLevel.USER) {
 +        watcher.hostOndemand(extents);
        }
      }
 -    return state;
    }
  
    private class MigrationCleanupThread implements Runnable {
@@@ -1140,9 -1257,10 +1147,11 @@@
      }
  
      MetricsInfo metricsInfo = getContext().getMetricsInfo();
 -    metricsInfo.addServiceTags(getApplicationName(), sa.getAddress());
 -
 -    var producers = ManagerMetrics.getProducers(getConfiguration(), this);
 +    metricsInfo.addServiceTags(getApplicationName(), sa.getAddress(), 
getResourceGroup());
 +    ManagerMetrics managerMetrics = new ManagerMetrics(getConfiguration(), 
this);
 +    var producers = managerMetrics.getProducers(getConfiguration(), this);
+     producers.add(balancerMetrics);
++
      metricsInfo.addMetricsProducers(producers.toArray(new 
MetricsProducer[0]));
      metricsInfo.init();
  
diff --cc test/src/main/java/org/apache/accumulo/test/BalanceIT.java
index 27ae709699,0164463903..231d50fb4a
--- a/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
+++ b/test/src/main/java/org/apache/accumulo/test/BalanceIT.java
@@@ -33,6 -37,20 +37,16 @@@ import org.slf4j.LoggerFactory
  public class BalanceIT extends AccumuloClusterHarness {
    private static final Logger log = LoggerFactory.getLogger(BalanceIT.class);
  
+   @Override
+   public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
+     Map<String,String> siteConfig = cfg.getSiteConfig();
+     siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
 -    siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+     siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true");
+     siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s");
+     cfg.setSiteConfig(siteConfig);
 -    // ensure we have two tservers
 -    if (cfg.getNumTservers() < 2) {
 -      cfg.setNumTservers(2);
 -    }
++    cfg.getClusterServerConfiguration().setNumDefaultTabletServers(2);
+   }
+ 
    @Override
    protected Duration defaultTimeout() {
      return Duration.ofMinutes(1);
diff --cc 
test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
index 46cb89ba7b,c03ad5c03d..f38a6d8bff
--- 
a/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
+++ 
b/test/src/main/java/org/apache/accumulo/test/functional/BalanceInPresenceOfOfflineTableIT.java
@@@ -71,11 -67,13 +71,13 @@@ public class BalanceInPresenceOfOffline
    public void configureMiniCluster(MiniAccumuloConfigImpl cfg, Configuration 
hadoopCoreSite) {
      Map<String,String> siteConfig = cfg.getSiteConfig();
      siteConfig.put(Property.TSERV_MAXMEM.getKey(), "10K");
 -    siteConfig.put(Property.TSERV_MAJC_DELAY.getKey(), "50ms");
+     siteConfig.put(Property.GENERAL_MICROMETER_ENABLED.getKey(), "true");
+     siteConfig.put("general.custom.metrics.opts.logging.step", "0.5s");
      cfg.setSiteConfig(siteConfig);
      // ensure we have two tservers
 -    if (cfg.getNumTservers() < 2) {
 -      cfg.setNumTservers(2);
 +    if (cfg.getClusterServerConfiguration().getTabletServerConfiguration()
 +        .get(Constants.DEFAULT_RESOURCE_GROUP_NAME) < 2) {
 +      cfg.getClusterServerConfiguration().setNumDefaultTabletServers(2);
      }
    }
  

Reply via email to