This is an automated email from the ASF dual-hosted git repository.

dlmarion pushed a commit to branch elasticity
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 7facf2f35556247dbf847d854fe995d5c20ad105
Merge: d7264bc1d0 aada55ef50
Author: Dave Marion <dlmar...@apache.org>
AuthorDate: Fri May 24 17:43:12 2024 +0000

    Merge branch 'main' into elasticity

 core/pom.xml                                       |  16 ++
 .../accumulo/core/logging/ConditionalLogger.java   | 194 +++++++++++++++++++++
 .../core/logging/DeduplicatingLoggerTest.java      |  69 ++++++++
 .../core/logging/EscalatingLoggerTest.java         |  77 ++++++++
 .../accumulo/manager/TabletGroupWatcher.java       |  11 +-
 .../accumulo/tserver/UnloadTabletHandler.java      |   1 -
 .../org/apache/accumulo/tserver/tablet/Tablet.java |  22 ++-
 7 files changed, 386 insertions(+), 4 deletions(-)

diff --cc 
server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 1b41145fa8,443df6c8f3..9299aab1be
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@@ -20,13 -20,10 +20,14 @@@ package org.apache.accumulo.manager
  
  import static 
com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
  import static java.lang.Math.min;
 +import static java.util.Objects.requireNonNull;
 +import static 
org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.FILES;
 +import static 
org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType.LOGS;
  
  import java.io.IOException;
+ import java.time.Duration;
  import java.util.ArrayList;
 +import java.util.Collection;
  import java.util.Collections;
  import java.util.HashMap;
  import java.util.HashSet;
@@@ -56,11 -56,10 +57,12 @@@ import org.apache.accumulo.core.data.Ra
  import org.apache.accumulo.core.data.TableId;
  import org.apache.accumulo.core.data.Value;
  import org.apache.accumulo.core.dataImpl.KeyExtent;
 -import org.apache.accumulo.core.gc.ReferenceFile;
+ import org.apache.accumulo.core.logging.ConditionalLogger.EscalatingLogger;
  import org.apache.accumulo.core.logging.TabletLogger;
 +import org.apache.accumulo.core.manager.state.TabletManagement;
 +import 
org.apache.accumulo.core.manager.state.TabletManagement.ManagementAction;
  import org.apache.accumulo.core.manager.state.tables.TableState;
 +import org.apache.accumulo.core.manager.thrift.ManagerGoalState;
  import org.apache.accumulo.core.manager.thrift.ManagerState;
  import org.apache.accumulo.core.manager.thrift.TabletServerStatus;
  import org.apache.accumulo.core.metadata.AccumuloTable;
@@@ -100,31 -110,18 +102,36 @@@ import org.apache.hadoop.fs.Path
  import org.apache.hadoop.io.Text;
  import org.apache.thrift.TException;
  import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
+ import org.slf4j.event.Level;
  
 -import com.google.common.annotations.VisibleForTesting;
  import com.google.common.base.Preconditions;
  import com.google.common.collect.ImmutableSortedSet;
  import com.google.common.collect.Iterators;
  
  abstract class TabletGroupWatcher extends AccumuloDaemonThread {
  
 +  public static class BadLocationStateException extends Exception {
 +    private static final long serialVersionUID = 2L;
 +
 +    // store as byte array because Text isn't Serializable
 +    private final byte[] metadataTableEntry;
 +
 +    public BadLocationStateException(String msg, Text row) {
 +      super(msg);
 +      this.metadataTableEntry = TextUtil.getBytes(requireNonNull(row));
 +    }
 +
 +    public Text getEncodedEndRow() {
 +      return new Text(metadataTableEntry);
 +    }
 +  }
 +
 +  private static final Logger LOG = 
LoggerFactory.getLogger(TabletGroupWatcher.class);
++
+   private static final Logger TABLET_UNLOAD_LOGGER =
+       new EscalatingLogger(Manager.log, Duration.ofMinutes(5), 1000, 
Level.INFO);
++
    private final Manager manager;
    private final TabletStateStore store;
    private final TabletGroupWatcher dependentWatcher;
@@@ -222,536 -182,203 +229,536 @@@
      }
    }
  
 -  @Override
 -  public void run() {
 -    int[] oldCounts = new int[TabletState.values().length];
 -    EventCoordinator.Listener eventListener = 
this.manager.nextEvent.getListener();
 +  class EventHandler implements EventCoordinator.Listener {
  
 -    WalStateManager wals = new WalStateManager(manager.getContext());
 +    // Setting this to true to start with because its not know what happended 
before this object was
 +    // created, so just start off with full scan.
 +    private boolean needsFullScan = true;
  
 -    while (manager.stillManager()) {
 -      // slow things down a little, otherwise we spam the logs when there are 
many wake-up events
 -      sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
 +    private final BlockingQueue<Range> rangesToProcess;
  
 -      final long waitTimeBetweenScans = manager.getConfiguration()
 -          .getTimeInMillis(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL);
 +    class RangeProccessor implements Runnable {
 +      @Override
 +      public void run() {
 +        try {
 +          while (manager.stillManager()) {
 +            var range = rangesToProcess.poll(100, TimeUnit.MILLISECONDS);
 +            if (range == null) {
 +              // check to see if still the manager
 +              continue;
 +            }
  
 -      int totalUnloaded = 0;
 -      int unloaded = 0;
 -      ClosableIterator<TabletLocationState> iter = null;
 -      try {
 -        Map<TableId,MergeStats> mergeStatsCache = new HashMap<>();
 -        Map<TableId,MergeStats> currentMerges = new HashMap<>();
 -        for (MergeInfo merge : manager.merges()) {
 -          if (merge.getExtent() != null) {
 -            currentMerges.put(merge.getExtent().tableId(), new 
MergeStats(merge));
 +            ArrayList<Range> ranges = new ArrayList<>();
 +            ranges.add(range);
 +
 +            rangesToProcess.drainTo(ranges);
 +
 +            if (!processRanges(ranges)) {
 +              setNeedsFullScan();
 +            }
            }
 +        } catch (InterruptedException e) {
 +          throw new RuntimeException(e);
          }
 +      }
 +    }
  
 -        // Get the current status for the current list of tservers
 -        SortedMap<TServerInstance,TabletServerStatus> currentTServers = new 
TreeMap<>();
 -        for (TServerInstance entry : manager.tserverSet.getCurrentServers()) {
 -          currentTServers.put(entry, manager.tserverStatus.get(entry));
 -        }
 +    EventHandler() {
 +      rangesToProcess = new ArrayBlockingQueue<>(3000);
  
 -        if (currentTServers.isEmpty()) {
 -          eventListener.waitForEvents(waitTimeBetweenScans);
 -          synchronized (this) {
 -            lastScanServers = Collections.emptySortedSet();
 +      Threads
 +          .createThread("TGW [" + store.name() + "] event range processor", 
new RangeProccessor())
 +          .start();
 +    }
 +
 +    private synchronized void setNeedsFullScan() {
 +      needsFullScan = true;
 +      notifyAll();
 +    }
 +
 +    public synchronized void clearNeedsFullScan() {
 +      needsFullScan = false;
 +    }
 +
 +    public synchronized boolean isNeedsFullScan() {
 +      return needsFullScan;
 +    }
 +
 +    @Override
 +    public void process(EventCoordinator.Event event) {
 +
 +      switch (event.getScope()) {
 +        case ALL:
 +        case DATA_LEVEL:
 +          setNeedsFullScan();
 +          break;
 +        case TABLE:
 +        case TABLE_RANGE:
 +          if (!rangesToProcess.offer(event.getExtent().toMetaRange())) {
 +            Manager.log.debug("[{}] unable to process event range {} because 
queue is full",
 +                store.name(), event.getExtent());
 +            setNeedsFullScan();
            }
 -          continue;
 +          break;
 +        default:
 +          throw new IllegalArgumentException("Unhandled scope " + 
event.getScope());
 +      }
 +    }
 +
 +    synchronized void waitForFullScan(long millis) {
 +      if (!needsFullScan) {
 +        try {
 +          wait(millis);
 +        } catch (InterruptedException e) {
 +          throw new RuntimeException(e);
          }
 +      }
 +    }
 +  }
  
 -        TabletLists tLists = new TabletLists(manager, currentTServers);
 +  private boolean processRanges(List<Range> ranges) {
 +    if (manager.getManagerGoalState() == ManagerGoalState.CLEAN_STOP) {
 +      return false;
 +    }
  
 -        ManagerState managerState = manager.getManagerState();
 -        int[] counts = new int[TabletState.values().length];
 -        stats.begin();
 -        // Walk through the tablets in our store, and work tablets
 -        // towards their goal
 -        iter = store.iterator();
 -        while (iter.hasNext()) {
 -          TabletLocationState tls = iter.next();
 -          if (tls == null) {
 -            continue;
 -          }
 +    TabletManagementParameters tabletMgmtParams = 
createTabletManagementParameters(false);
  
 -          // ignore entries for tables that do not exist in zookeeper
 -          if (manager.getTableManager().getTableState(tls.extent.tableId()) 
== null) {
 -            continue;
 -          }
 +    var currentTservers = 
getCurrentTservers(tabletMgmtParams.getOnlineTsevers());
 +    if (currentTservers.isEmpty()) {
 +      return false;
 +    }
  
 -          // Don't overwhelm the tablet servers with work
 -          if (tLists.unassigned.size() + unloaded
 -              > Manager.MAX_TSERVER_WORK_CHUNK * currentTServers.size()) {
 -            flushChanges(tLists, wals);
 -            tLists.reset();
 -            unloaded = 0;
 -            eventListener.waitForEvents(waitTimeBetweenScans);
 -          }
 -          TableId tableId = tls.extent.tableId();
 -          TableConfiguration tableConf = 
manager.getContext().getTableConfiguration(tableId);
 -
 -          MergeStats mergeStats = mergeStatsCache.computeIfAbsent(tableId, k 
-> {
 -            var mStats = currentMerges.get(k);
 -            return mStats != null ? mStats : new MergeStats(new MergeInfo());
 -          });
 -          TabletGoalState goal = manager.getGoalState(tls, 
mergeStats.getMergeInfo());
 -          Location location = tls.getLocation();
 -          TabletState state = tls.getState(currentTServers.keySet());
 -
 -          TabletLogger.missassigned(tls.extent, goal.toString(), 
state.toString(),
 -              tls.getFutureServer(), tls.getCurrentServer(), 
tls.walogs.size());
 -
 -          stats.update(tableId, state);
 -          mergeStats.update(tls.extent, state);
 -
 -          // Always follow through with assignments
 -          if (state == TabletState.ASSIGNED) {
 -            goal = TabletGoalState.HOSTED;
 +    try (var iter = store.iterator(ranges, tabletMgmtParams)) {
 +      long t1 = System.currentTimeMillis();
 +      manageTablets(iter, tabletMgmtParams, currentTservers, false);
 +      long t2 = System.currentTimeMillis();
 +      Manager.log.debug(String.format("[%s]: partial scan time %.2f seconds 
for %,d ranges",
 +          store.name(), (t2 - t1) / 1000., ranges.size()));
 +    } catch (Exception e) {
 +      Manager.log.error("Error processing {} ranges for store {} ", 
ranges.size(), store.name(), e);
 +    }
 +
 +    return true;
 +  }
 +
 +  private final Set<KeyExtent> hostingRequestInProgress = new 
ConcurrentSkipListSet<>();
 +
 +  public void hostOndemand(Collection<KeyExtent> extents) {
 +    // This is only expected to be called for the user level
 +    Preconditions.checkState(getLevel() == Ample.DataLevel.USER);
 +
 +    final List<KeyExtent> inProgress = new ArrayList<>();
 +    extents.forEach(ke -> {
 +      if (hostingRequestInProgress.add(ke)) {
 +        LOG.info("Tablet hosting requested for: {} ", ke);
 +        inProgress.add(ke);
 +      } else {
 +        LOG.trace("Ignoring hosting request because another thread is 
currently processing it {}",
 +            ke);
 +      }
 +    });
 +    // Do not add any code here, it may interfere with the finally block 
removing extents from
 +    // hostingRequestInProgress
 +    try (var mutator = 
manager.getContext().getAmple().conditionallyMutateTablets()) {
 +      inProgress.forEach(ke -> {
 +        mutator.mutateTablet(ke).requireAbsentOperation()
 +            
.requireTabletAvailability(TabletAvailability.ONDEMAND).requireAbsentLocation()
 +            
.setHostingRequested().submit(TabletMetadata::getHostingRequested);
 +
 +      });
 +
 +      List<Range> ranges = new ArrayList<>();
 +
 +      mutator.process().forEach((extent, result) -> {
 +        if (result.getStatus() == Ample.ConditionalResult.Status.ACCEPTED) {
 +          // cache this success for a bit
 +          ranges.add(extent.toMetaRange());
 +        } else {
 +          if (LOG.isTraceEnabled()) {
 +            // only read the metadata if the logging is enabled
 +            LOG.trace("Failed to set hosting request {}", 
result.readMetadata());
            }
 -          if (Manager.log.isTraceEnabled()) {
 -            Manager.log.trace(
 -                "[{}] Shutting down all Tservers: {}, dependentCount: {} 
Extent: {}, state: {}, goal: {}",
 -                store.name(), 
manager.serversToShutdown.equals(currentTServers.keySet()),
 -                dependentWatcher == null ? "null" : 
dependentWatcher.assignedOrHosted(), tls.extent,
 -                state, goal);
 +        }
 +      });
 +
 +      processRanges(ranges);
 +    } finally {
 +      inProgress.forEach(hostingRequestInProgress::remove);
 +    }
 +  }
 +
 +  private TabletManagementParameters
 +      createTabletManagementParameters(boolean 
lookForTabletsNeedingVolReplacement) {
 +
 +    HashMap<Ample.DataLevel,Boolean> parentLevelUpgrade = new HashMap<>();
 +    UpgradeCoordinator.UpgradeStatus upgradeStatus = 
manager.getUpgradeStatus();
 +    for (var level : Ample.DataLevel.values()) {
 +      parentLevelUpgrade.put(level, 
upgradeStatus.isParentLevelUpgraded(level));
 +    }
 +
 +    Set<TServerInstance> shutdownServers;
 +    if (store.getLevel() == Ample.DataLevel.USER) {
 +      shutdownServers = manager.shutdownServers();
 +    } else {
 +      // Use the servers to shutdown filtered by the dependent watcher. These 
are servers to
 +      // shutdown that the dependent watcher has determined it has no tablets 
hosted on or assigned
 +      // to.
 +      shutdownServers = dependentWatcher.getFilteredServersToShutdown();
 +    }
 +
 +    var tServersSnapshot = manager.tserversSnapshot();
 +
 +    return new TabletManagementParameters(manager.getManagerState(), 
parentLevelUpgrade,
 +        manager.onlineTables(), tServersSnapshot, shutdownServers, 
manager.migrationsSnapshot(),
 +        store.getLevel(), manager.getCompactionHints(store.getLevel()), 
canSuspendTablets(),
 +        lookForTabletsNeedingVolReplacement ? 
manager.getContext().getVolumeReplacements()
 +            : Map.of(),
 +        manager.getSteadyTime());
 +  }
 +
 +  private Set<TServerInstance> getFilteredServersToShutdown() {
 +    return filteredServersToShutdown;
 +  }
 +
 +  private static class TableMgmtStats {
 +    int[] counts = new int[TabletState.values().length];
 +    private int totalUnloaded;
 +    private long totalVolumeReplacements;
 +    private int tabletsWithErrors;
 +  }
 +
 +  private TableMgmtStats manageTablets(Iterator<TabletManagement> iter,
 +      TabletManagementParameters tableMgmtParams,
 +      SortedMap<TServerInstance,TabletServerStatus> currentTServers, boolean 
isFullScan)
 +      throws BadLocationStateException, TException, 
DistributedStoreException, WalMarkerException,
 +      IOException {
 +
 +    final TableMgmtStats tableMgmtStats = new TableMgmtStats();
 +    final boolean shuttingDownAllTabletServers =
 +        
tableMgmtParams.getServersToShutdown().equals(currentTServers.keySet());
 +    if (shuttingDownAllTabletServers && !isFullScan) {
 +      // If we are shutting down all of the TabletServers, then don't process 
any events
 +      // from the EventCoordinator.
 +      LOG.debug("Partial scan requested, but aborted due to shutdown of all 
TabletServers");
 +      return tableMgmtStats;
 +    }
 +
 +    int unloaded = 0;
 +
 +    TabletLists tLists = new TabletLists(currentTServers, 
tableMgmtParams.getGroupedTServers(),
 +        tableMgmtParams.getServersToShutdown());
 +
 +    CompactionJobGenerator compactionGenerator =
 +        new CompactionJobGenerator(new 
ServiceEnvironmentImpl(manager.getContext()),
 +            tableMgmtParams.getCompactionHints(), 
tableMgmtParams.getSteadyTime());
 +
 +    Set<TServerInstance> filteredServersToShutdown =
 +        new HashSet<>(tableMgmtParams.getServersToShutdown());
 +
 +    while (iter.hasNext()) {
 +      final TabletManagement mti = iter.next();
 +      if (mti == null) {
 +        throw new IllegalStateException("State store returned a null 
ManagerTabletInfo object");
 +      }
 +
 +      final TabletMetadata tm = mti.getTabletMetadata();
 +
 +      final String mtiError = mti.getErrorMessage();
 +      if (mtiError != null) {
 +        // An error happened on the TabletServer in the 
TabletManagementIterator
 +        // when trying to process this extent.
 +        LOG.warn(
 +            "Error on TabletServer trying to get Tablet management 
information for extent: {}. Error message: {}",
 +            tm.getExtent(), mtiError);
 +        this.metrics.incrementTabletGroupWatcherError(this.store.getLevel());
 +        tableMgmtStats.tabletsWithErrors++;
 +        continue;
 +      }
 +
 +      final TableId tableId = tm.getTableId();
 +      // ignore entries for tables that do not exist in zookeeper
 +      if (manager.getTableManager().getTableState(tableId) == null) {
 +        continue;
 +      }
 +
 +      // Don't overwhelm the tablet servers with work
 +      if (tLists.unassigned.size() + unloaded
 +          > Manager.MAX_TSERVER_WORK_CHUNK * currentTServers.size()
 +          || tLists.volumeReplacements.size() > 1000) {
 +        flushChanges(tLists);
 +        tLists.reset();
 +        unloaded = 0;
 +      }
 +
 +      final TableConfiguration tableConf = 
manager.getContext().getTableConfiguration(tableId);
 +
 +      TabletState state = TabletState.compute(tm, currentTServers.keySet());
 +      if (state == TabletState.ASSIGNED_TO_DEAD_SERVER) {
 +        /*
 +         * This code exists to deal with a race condition caused by two 
threads running in this
 +         * class that compute tablets actions. One thread does full scans and 
the other reacts to
 +         * events and does partial scans. Below is an example of the race 
condition this is
 +         * handling.
 +         *
 +         * - TGW Thread 1 : reads the set of tablets servers and its empty
 +         *
 +         * - TGW Thread 2 : reads the set of tablet servers and its [TS1]
 +         *
 +         * - TGW Thread 2 : Sees tabletX without a location and assigns it to 
TS1
 +         *
 +         * - TGW Thread 1 : Sees tabletX assigned to TS1 and assumes it's 
assigned to a dead tablet
 +         * server because its set of live servers is the empty set.
 +         *
 +         * To deal with this race condition, this code recomputes the tablet 
state using the latest
 +         * tservers when a tablet is seen assigned to a dead tserver.
 +         */
 +
 +        TabletState newState = TabletState.compute(tm, 
manager.tserversSnapshot().getTservers());
 +        if (newState != state) {
 +          LOG.debug("Tablet state changed when using latest set of tservers 
{} {} {}",
 +              tm.getExtent(), state, newState);
 +          state = newState;
 +        }
 +      }
 +      tableMgmtStats.counts[state.ordinal()]++;
 +
 +      // This is final because nothing in this method should change the goal. 
All computation of the
 +      // goal should be done in TabletGoalState.compute() so that all parts 
of the Accumulo code
 +      // will compute a consistent goal.
 +      final TabletGoalState goal =
 +          TabletGoalState.compute(tm, state, manager.tabletBalancer, 
tableMgmtParams);
 +
 +      final Set<ManagementAction> actions = mti.getActions();
 +
 +      if (actions.contains(ManagementAction.NEEDS_RECOVERY) && goal != 
TabletGoalState.HOSTED) {
 +        LOG.warn("Tablet has wals, but goal is not hosted. Tablet: {}, 
goal:{}", tm.getExtent(),
 +            goal);
 +      }
 +
 +      if (actions.contains(ManagementAction.NEEDS_VOLUME_REPLACEMENT)) {
 +        tableMgmtStats.totalVolumeReplacements++;
 +        if (state == TabletState.UNASSIGNED || state == 
TabletState.SUSPENDED) {
 +          var volRep =
 +              
VolumeUtil.computeVolumeReplacements(tableMgmtParams.getVolumeReplacements(), 
tm);
 +          if (volRep.logsToRemove.size() + volRep.filesToRemove.size() > 0) {
 +            if (tm.getLocation() != null) {
 +              // since the totalVolumeReplacements counter was incremented, 
should try this again
 +              // later after its unassigned
 +              LOG.debug("Volume replacement needed for {} but it has a 
location {}.",
 +                  tm.getExtent(), tm.getLocation());
 +            } else if (tm.getOperationId() != null) {
 +              LOG.debug("Volume replacement needed for {} but it has an 
active operation {}.",
 +                  tm.getExtent(), tm.getOperationId());
 +            } else {
 +              LOG.debug("Volume replacement needed for {}.", tm.getExtent());
 +              // buffer replacements so that multiple mutations can be done 
at once
 +              tLists.volumeReplacements.add(volRep);
 +            }
 +          } else {
 +            LOG.debug("Volume replacement evaluation for {} returned no 
changes.", tm.getExtent());
            }
 +        } else {
 +          LOG.debug("Volume replacement needed for {} but its tablet state is 
{}.", tm.getExtent(),
 +              state);
 +        }
 +      }
 +
 +      if (actions.contains(ManagementAction.BAD_STATE) && 
tm.isFutureAndCurrentLocationSet()) {
 +        throw new BadLocationStateException(
 +            tm.getExtent() + " is both assigned and hosted, which should 
never happen: " + this,
 +            tm.getExtent().toMetaRow());
 +      }
 +
 +      final Location location = tm.getLocation();
 +      Location current = null;
 +      Location future = null;
 +      if (tm.hasCurrent()) {
 +        current = tm.getLocation();
 +      } else {
 +        future = tm.getLocation();
 +      }
 +      TabletLogger.missassigned(tm.getExtent(), goal.toString(), 
state.toString(),
 +          future != null ? future.getServerInstance() : null,
 +          current != null ? current.getServerInstance() : null, 
tm.getLogs().size());
 +
 +      if (isFullScan) {
 +        stats.update(tableId, state);
 +      }
 +
 +      if (Manager.log.isTraceEnabled()) {
 +        Manager.log.trace(
 +            "[{}] Shutting down all Tservers: {}, dependentCount: {} Extent: 
{}, state: {}, goal: {} actions:{} #wals:{}",
 +            store.name(), 
tableMgmtParams.getServersToShutdown().equals(currentTServers.keySet()),
 +            dependentWatcher == null ? "null" : 
dependentWatcher.assignedOrHosted(), tm.getExtent(),
 +            state, goal, actions, tm.getLogs().size());
 +      }
 +
 +      if (actions.contains(ManagementAction.NEEDS_SPLITTING)) {
 +        LOG.debug("{} may need splitting.", tm.getExtent());
 +        manager.getSplitter().initiateSplit(new SeedSplitTask(manager, 
tm.getExtent()));
 +      }
 +
 +      if (actions.contains(ManagementAction.NEEDS_COMPACTING)) {
 +        var jobs = compactionGenerator.generateJobs(tm,
 +            TabletManagementIterator.determineCompactionKinds(actions));
 +        LOG.debug("{} may need compacting adding {} jobs", tm.getExtent(), 
jobs.size());
 +        manager.getCompactionCoordinator().addJobs(tm, jobs);
 +      }
  
 -          // if we are shutting down all the tabletservers, we have to do it 
in order
 -          if ((goal == TabletGoalState.SUSPENDED && state == 
TabletState.HOSTED)
 -              && manager.serversToShutdown.equals(currentTServers.keySet())) {
 -            if (dependentWatcher != null) {
 -              // If the dependentWatcher is for the user tables, check to see
 -              // that user tables exist.
 -              DataLevel dependentLevel = dependentWatcher.store.getLevel();
 -              boolean userTablesExist = true;
 -              switch (dependentLevel) {
 -                case USER:
 -                  Set<TableId> onlineTables = manager.onlineTables();
 -                  onlineTables.remove(AccumuloTable.ROOT.tableId());
 -                  onlineTables.remove(AccumuloTable.METADATA.tableId());
 -                  userTablesExist = !onlineTables.isEmpty();
 -                  break;
 -                case METADATA:
 -                case ROOT:
 -                default:
 -                  break;
 +      // ELASITICITY_TODO the case where a planner generates compactions at 
time T1 for tablet
 +      // and later at time T2 generates nothing for the same tablet is not 
being handled. At
 +      // time T1 something could have been queued. However at time T2 we will 
not clear those
 +      // entries from the queue because we see nothing here for that case. 
After a full
 +      // metadata scan could remove any tablets that were not updated during 
the scan.
 +
 +      if (actions.contains(ManagementAction.NEEDS_LOCATION_UPDATE)
 +          || actions.contains(ManagementAction.NEEDS_RECOVERY)) {
 +
 +        if (tm.getLocation() != null) {
 +          
filteredServersToShutdown.remove(tm.getLocation().getServerInstance());
 +        }
 +
 +        if (goal == TabletGoalState.HOSTED) {
 +
 +          // RecoveryManager.recoverLogs will return false when all of the 
logs
 +          // have been sorted so that recovery can occur. Delay the hosting of
 +          // the Tablet until the sorting is finished.
 +          if ((state != TabletState.HOSTED && 
actions.contains(ManagementAction.NEEDS_RECOVERY))
 +              && manager.recoveryManager.recoverLogs(tm.getExtent(), 
tm.getLogs())) {
 +            LOG.debug("Not hosting {} as it needs recovery, logs: {}", 
tm.getExtent(),
 +                tm.getLogs().size());
 +            continue;
 +          }
 +          switch (state) {
 +            case HOSTED:
 +              if 
(location.getServerInstance().equals(manager.migrations.get(tm.getExtent()))) {
 +                manager.migrations.remove(tm.getExtent());
                }
 -              // If the stats object in the dependentWatcher is empty, then it
 -              // currently does not have data about what is hosted or not. In
 -              // that case host these tablets until the dependent watcher can
 -              // gather some data.
 -              final Map<TableId,TableCounts> stats = 
dependentWatcher.getStats();
 -              if (dependentLevel == DataLevel.USER) {
 -                if (userTablesExist
 -                    && (stats == null || stats.isEmpty() || 
assignedOrHosted(stats) > 0)) {
 -                  goal = TabletGoalState.HOSTED;
 -                }
 -              } else if (stats == null || stats.isEmpty() || 
assignedOrHosted(stats) > 0) {
 -                goal = TabletGoalState.HOSTED;
 +              break;
 +            case ASSIGNED_TO_DEAD_SERVER:
 +              hostDeadTablet(tLists, tm, location);
 +              break;
 +            case SUSPENDED:
 +              hostSuspendedTablet(tLists, tm, location, tableConf);
 +              break;
 +            case UNASSIGNED:
 +              hostUnassignedTablet(tLists, tm.getExtent(),
 +                  new UnassignedTablet(location, tm.getLast()));
 +              break;
 +            case ASSIGNED:
 +              // Send another reminder
 +              tLists.assigned.add(new Assignment(tm.getExtent(),
 +                  future != null ? future.getServerInstance() : null, 
tm.getLast()));
 +              break;
 +            default:
 +              break;
 +          }
 +        } else {
 +          switch (state) {
 +            case SUSPENDED:
 +              // Request a move to UNASSIGNED, so as to allow balancing to 
continue.
 +              tLists.suspendedToGoneServers.add(tm);
 +              cancelOfflineTableMigrations(tm.getExtent());
 +              break;
 +            case UNASSIGNED:
 +              cancelOfflineTableMigrations(tm.getExtent());
 +              break;
 +            case ASSIGNED_TO_DEAD_SERVER:
 +              unassignDeadTablet(tLists, tm);
 +              break;
 +            case HOSTED:
 +              TServerConnection client =
 +                  
manager.tserverSet.getConnection(location.getServerInstance());
 +              if (client != null) {
-                 LOG.debug("Requesting tserver {} unload tablet {}", 
location.getServerInstance(),
-                     tm.getExtent());
++                TABLET_UNLOAD_LOGGER.trace("[{}] Requesting TabletServer {} 
unload {} {}",
++                    store.name(), location.getServerInstance(), 
tm.getExtent(), goal.howUnload());
 +                client.unloadTablet(manager.managerLock, tm.getExtent(), 
goal.howUnload(),
 +                    manager.getSteadyTime().getMillis());
 +                tableMgmtStats.totalUnloaded++;
 +                unloaded++;
 +              } else {
 +                Manager.log.warn("Could not connect to server {}", location);
                }
 -            }
 +              break;
 +            case ASSIGNED:
 +              break;
            }
 +        }
 +      }
 +    }
  
 -          if (goal == TabletGoalState.HOSTED) {
 -            if ((state != TabletState.HOSTED && !tls.walogs.isEmpty())
 -                && manager.recoveryManager.recoverLogs(tls.extent, 
tls.walogs)) {
 -              continue;
 -            }
 -            switch (state) {
 -              case HOSTED:
 -                if 
(location.getServerInstance().equals(manager.migrations.get(tls.extent))) {
 -                  manager.migrations.remove(tls.extent);
 -                }
 -                break;
 -              case ASSIGNED_TO_DEAD_SERVER:
 -                hostDeadTablet(tLists, tls, location, wals);
 -                break;
 -              case SUSPENDED:
 -                hostSuspendedTablet(tLists, tls, location, tableConf);
 -                break;
 -              case UNASSIGNED:
 -                hostUnassignedTablet(tLists, tls.extent, new 
UnassignedTablet(location, tls.last));
 -                break;
 -              case ASSIGNED:
 -                // Send another reminder
 -                tLists.assigned.add(new Assignment(tls.extent, 
tls.getFutureServer(), tls.last));
 -                break;
 -            }
 -          } else {
 -            switch (state) {
 -              case SUSPENDED:
 -                // Request a move to UNASSIGNED, so as to allow balancing to 
continue.
 -                tLists.suspendedToGoneServers.add(tls);
 -                cancelOfflineTableMigrations(tls.extent);
 -                break;
 -              case UNASSIGNED:
 -                cancelOfflineTableMigrations(tls.extent);
 -                break;
 -              case ASSIGNED_TO_DEAD_SERVER:
 -                unassignDeadTablet(tLists, tls, wals);
 -                break;
 -              case HOSTED:
 -                TServerConnection client =
 -                    
manager.tserverSet.getConnection(location.getServerInstance());
 -                if (client != null) {
 -                  try {
 -                    TABLET_UNLOAD_LOGGER.trace("[{}] Requesting TabletServer 
{} unload {} {}",
 -                        store.name(), location.getServerInstance(), 
tls.extent, goal.howUnload());
 -                    client.unloadTablet(manager.managerLock, tls.extent, 
goal.howUnload(),
 -                        manager.getSteadyTime().getMillis());
 -                    unloaded++;
 -                    totalUnloaded++;
 -                  } catch (TException tException) {
 -                    Manager.log.warn("[{}] Failed to request tablet unload {} 
{} {}", store.name(),
 -                        location.getServerInstance(), tls.extent, 
goal.howUnload(), tException);
 -                  }
 -                } else {
 -                  Manager.log.warn("Could not connect to server {}", 
location);
 -                }
 -                break;
 -              case ASSIGNED:
 -                break;
 -            }
 +    flushChanges(tLists);
 +
 +    if (isFullScan) {
 +      this.filteredServersToShutdown = Set.copyOf(filteredServersToShutdown);
 +    }
 +
 +    return tableMgmtStats;
 +  }
 +
 +  private SortedMap<TServerInstance,TabletServerStatus>
 +      getCurrentTservers(Set<TServerInstance> onlineTservers) {
 +    // Get the current status for the current list of tservers
 +    final SortedMap<TServerInstance,TabletServerStatus> currentTServers = new 
TreeMap<>();
 +    for (TServerInstance entry : onlineTservers) {
 +      currentTServers.put(entry, manager.tserverStatus.get(entry));
 +    }
 +    return currentTServers;
 +  }
 +
 +  @Override
 +  public void run() {
 +    int[] oldCounts = new int[TabletState.values().length];
 +    boolean lookForTabletsNeedingVolReplacement = true;
 +
 +    while (manager.stillManager()) {
 +      if (!eventHandler.isNeedsFullScan()) {
 +        // If an event handled by the EventHandler.RangeProcessor indicated
 +        // that we need to do a full scan, then do it. Otherwise wait a bit
 +        // before re-checking the tablets.
 +        sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
 +      }
 +
 +      final long waitTimeBetweenScans = manager.getConfiguration()
 +          .getTimeInMillis(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL);
 +
 +      TabletManagementParameters tableMgmtParams =
 +          
createTabletManagementParameters(lookForTabletsNeedingVolReplacement);
 +      var currentTServers = 
getCurrentTservers(tableMgmtParams.getOnlineTsevers());
 +
 +      ClosableIterator<TabletManagement> iter = null;
 +      try {
 +        if (currentTServers.isEmpty()) {
 +          eventHandler.waitForFullScan(waitTimeBetweenScans);
 +          synchronized (this) {
 +            lastScanServers = Collections.emptySortedSet();
            }
 -          counts[state.ordinal()]++;
 +          continue;
          }
  
 -        flushChanges(tLists, wals);
 +        stats.begin();
 +
 +        ManagerState managerState = tableMgmtParams.getManagerState();
 +
 +        // Clear the need for a full scan before starting a full scan inorder 
to detect events that
 +        // happen during the full scan.
 +        eventHandler.clearNeedsFullScan();
 +
 +        iter = store.iterator(tableMgmtParams);
 +        
manager.getCompactionCoordinator().getJobQueues().beginFullScan(store.getLevel());
 +        var tabletMgmtStats = manageTablets(iter, tableMgmtParams, 
currentTServers, true);
 +        
manager.getCompactionCoordinator().getJobQueues().endFullScan(store.getLevel());
 +
 +        // If currently looking for volume replacements, determine if the 
next round needs to look.
 +        if (lookForTabletsNeedingVolReplacement) {
 +          // Continue to look for tablets needing volume replacement if there 
was an error
 +          // processing tablets in the call to manageTablets() or if we are 
still performing volume
 +          // replacement. We only want to stop looking for tablets that need 
volume replacement when
 +          // we have successfully processed all tablet metadata and no more 
volume replacements are
 +          // being performed.
 +          lookForTabletsNeedingVolReplacement = 
tabletMgmtStats.totalVolumeReplacements != 0
 +              || tabletMgmtStats.tabletsWithErrors != 0;
 +        }
  
          // provide stats after flushing changes to avoid race conditions w/ 
delete table
          stats.end(managerState);
diff --cc 
server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
index 573e3c49f0,4ea148046b..0747a10867
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/tablet/Tablet.java
@@@ -21,11 -21,13 +21,12 @@@ package org.apache.accumulo.tserver.tab
  import static 
com.google.common.util.concurrent.Uninterruptibles.sleepUninterruptibly;
  import static java.nio.charset.StandardCharsets.UTF_8;
  import static java.util.stream.Collectors.toList;
 +import static org.apache.accumulo.core.util.LazySingletons.RANDOM;
  
 -import java.io.ByteArrayInputStream;
 -import java.io.DataInputStream;
  import java.io.FileNotFoundException;
  import java.io.IOException;
 -import java.lang.ref.SoftReference;
 +import java.io.UncheckedIOException;
+ import java.time.Duration;
  import java.util.ArrayList;
  import java.util.Collection;
  import java.util.Collections;
@@@ -60,8 -67,11 +61,9 @@@ import org.apache.accumulo.core.dataImp
  import org.apache.accumulo.core.file.FilePrefix;
  import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
  import org.apache.accumulo.core.iteratorsImpl.system.SourceSwitchingIterator;
+ import org.apache.accumulo.core.logging.ConditionalLogger.DeduplicatingLogger;
  import org.apache.accumulo.core.logging.TabletLogger;
  import org.apache.accumulo.core.manager.state.tables.TableState;
 -import org.apache.accumulo.core.manager.thrift.BulkImportState;
  import org.apache.accumulo.core.metadata.AccumuloTable;
  import org.apache.accumulo.core.metadata.ReferencedTabletFile;
  import org.apache.accumulo.core.metadata.StoredTabletFile;


Reply via email to