This is an automated email from the ASF dual-hosted git repository. cshannon pushed a commit to branch elasticity in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 5220f009ccf2816f195b103d8545707cd2b54793 Merge: edda158a54 6dcf84ed00 Author: Christopher L. Shannon <cshan...@apache.org> AuthorDate: Fri May 10 09:57:21 2024 -0400 Merge branch 'main' into elasticity .../apache/accumulo/core/util/time/SteadyTime.java | 84 +++++++++++++ .../accumulo/core/util/time/SteadyTimeTest.java | 56 +++++++++ server/manager/pom.xml | 5 + .../java/org/apache/accumulo/manager/Manager.java | 9 +- .../org/apache/accumulo/manager/ManagerTime.java | 120 ++++++++++++++++--- .../accumulo/manager/TabletGroupWatcher.java | 6 +- .../apache/accumulo/manager/ManagerTimeTest.java | 130 +++++++++++++++++++++ 7 files changed, 389 insertions(+), 21 deletions(-) diff --cc server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index a7bf212243,ded6d62f83..631daaddb1 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@@ -120,10 -114,9 +120,11 @@@ import org.apache.accumulo.core.util.Re import org.apache.accumulo.core.util.threads.ThreadPools; import org.apache.accumulo.core.util.threads.Threads; import org.apache.accumulo.core.util.time.NanoTime; + import org.apache.accumulo.core.util.time.SteadyTime; +import org.apache.accumulo.manager.compaction.coordinator.CompactionCoordinator; import org.apache.accumulo.manager.metrics.ManagerMetrics; import org.apache.accumulo.manager.recovery.RecoveryManager; +import org.apache.accumulo.manager.split.Splitter; import org.apache.accumulo.manager.state.TableCounts; import org.apache.accumulo.manager.tableOps.TraceRepo; import org.apache.accumulo.manager.upgrade.PreUpgradeValidation; diff --cc server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 060411fcbf,530bd950b1..ca69231394 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@@ -214,484 -177,203 +214,484 @@@ abstract class TabletGroupWatcher exten } } - @Override - public void run() { - int[] oldCounts = new int[TabletState.values().length]; - EventCoordinator.Listener eventListener = this.manager.nextEvent.getListener(); + class EventHandler implements EventCoordinator.Listener { - WalStateManager wals = new WalStateManager(manager.getContext()); + // Setting this to true to start with because its not know what happended before this object was + // created, so just start off with full scan. + private boolean needsFullScan = true; - while (manager.stillManager()) { - // slow things down a little, otherwise we spam the logs when there are many wake-up events - sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + private final BlockingQueue<Range> rangesToProcess; - final long waitTimeBetweenScans = manager.getConfiguration() - .getTimeInMillis(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL); + class RangeProccessor implements Runnable { + @Override + public void run() { + try { + while (manager.stillManager()) { + var range = rangesToProcess.poll(100, TimeUnit.MILLISECONDS); + if (range == null) { + // check to see if still the manager + continue; + } - int totalUnloaded = 0; - int unloaded = 0; - ClosableIterator<TabletLocationState> iter = null; - try { - Map<TableId,MergeStats> mergeStatsCache = new HashMap<>(); - Map<TableId,MergeStats> currentMerges = new HashMap<>(); - for (MergeInfo merge : manager.merges()) { - if (merge.getExtent() != null) { - currentMerges.put(merge.getExtent().tableId(), new MergeStats(merge)); + ArrayList<Range> ranges = new ArrayList<>(); + ranges.add(range); + + rangesToProcess.drainTo(ranges); + + if (manager.getManagerGoalState() == ManagerGoalState.CLEAN_STOP) { + // only do full scans when trying to shutdown + setNeedsFullScan(); + continue; + } + + TabletManagementParameters tabletMgmtParams = createTabletManagementParameters(false); + + var currentTservers = getCurrentTservers(tabletMgmtParams.getOnlineTsevers()); + if (currentTservers.isEmpty()) { + setNeedsFullScan(); + continue; + } + + try (var iter = store.iterator(ranges, tabletMgmtParams)) { + long t1 = System.currentTimeMillis(); + manageTablets(iter, tabletMgmtParams, currentTservers, false); + long t2 = System.currentTimeMillis(); + Manager.log.debug(String.format("[%s]: partial scan time %.2f seconds for %,d ranges", + store.name(), (t2 - t1) / 1000., ranges.size())); + } catch (Exception e) { + Manager.log.error("Error processing {} ranges for store {} ", ranges.size(), + store.name(), e); + } } + } catch (InterruptedException e) { + throw new RuntimeException(e); } + } + } - // Get the current status for the current list of tservers - SortedMap<TServerInstance,TabletServerStatus> currentTServers = new TreeMap<>(); - for (TServerInstance entry : manager.tserverSet.getCurrentServers()) { - currentTServers.put(entry, manager.tserverStatus.get(entry)); - } + EventHandler() { + rangesToProcess = new ArrayBlockingQueue<>(3000); - if (currentTServers.isEmpty()) { - eventListener.waitForEvents(waitTimeBetweenScans); - synchronized (this) { - lastScanServers = Collections.emptySortedSet(); + Threads + .createThread("TGW [" + store.name() + "] event range processor", new RangeProccessor()) + .start(); + } + + private synchronized void setNeedsFullScan() { + needsFullScan = true; + notifyAll(); + } + + public synchronized void clearNeedsFullScan() { + needsFullScan = false; + } + + public synchronized boolean isNeedsFullScan() { + return needsFullScan; + } + + @Override + public void process(EventCoordinator.Event event) { + + switch (event.getScope()) { + case ALL: + case DATA_LEVEL: + setNeedsFullScan(); + break; + case TABLE: + case TABLE_RANGE: + if (!rangesToProcess.offer(event.getExtent().toMetaRange())) { + Manager.log.debug("[{}] unable to process event range {} because queue is full", + store.name(), event.getExtent()); + setNeedsFullScan(); } - continue; + break; + default: + throw new IllegalArgumentException("Unhandled scope " + event.getScope()); + } + } + + synchronized void waitForFullScan(long millis) { + if (!needsFullScan) { + try { + wait(millis); + } catch (InterruptedException e) { + throw new RuntimeException(e); } + } + } + } - TabletLists tLists = new TabletLists(manager, currentTServers); + private TabletManagementParameters + createTabletManagementParameters(boolean lookForTabletsNeedingVolReplacement) { - ManagerState managerState = manager.getManagerState(); - int[] counts = new int[TabletState.values().length]; - stats.begin(); - // Walk through the tablets in our store, and work tablets - // towards their goal - iter = store.iterator(); - while (iter.hasNext()) { - TabletLocationState tls = iter.next(); - if (tls == null) { - continue; - } + HashMap<Ample.DataLevel,Boolean> parentLevelUpgrade = new HashMap<>(); + UpgradeCoordinator.UpgradeStatus upgradeStatus = manager.getUpgradeStatus(); + for (var level : Ample.DataLevel.values()) { + parentLevelUpgrade.put(level, upgradeStatus.isParentLevelUpgraded(level)); + } - // ignore entries for tables that do not exist in zookeeper - if (manager.getTableManager().getTableState(tls.extent.tableId()) == null) { - continue; - } + Set<TServerInstance> shutdownServers; + if (store.getLevel() == Ample.DataLevel.USER) { + shutdownServers = manager.shutdownServers(); + } else { + // Use the servers to shutdown filtered by the dependent watcher. These are servers to + // shutdown that the dependent watcher has determined it has no tablets hosted on or assigned + // to. + shutdownServers = dependentWatcher.getFilteredServersToShutdown(); + } - // Don't overwhelm the tablet servers with work - if (tLists.unassigned.size() + unloaded - > Manager.MAX_TSERVER_WORK_CHUNK * currentTServers.size()) { - flushChanges(tLists, wals); - tLists.reset(); - unloaded = 0; - eventListener.waitForEvents(waitTimeBetweenScans); - } - TableId tableId = tls.extent.tableId(); - TableConfiguration tableConf = manager.getContext().getTableConfiguration(tableId); - - MergeStats mergeStats = mergeStatsCache.computeIfAbsent(tableId, k -> { - var mStats = currentMerges.get(k); - return mStats != null ? mStats : new MergeStats(new MergeInfo()); - }); - TabletGoalState goal = manager.getGoalState(tls, mergeStats.getMergeInfo()); - Location location = tls.getLocation(); - TabletState state = tls.getState(currentTServers.keySet()); - - TabletLogger.missassigned(tls.extent, goal.toString(), state.toString(), - tls.getFutureServer(), tls.getCurrentServer(), tls.walogs.size()); - - stats.update(tableId, state); - mergeStats.update(tls.extent, state); - - // Always follow through with assignments - if (state == TabletState.ASSIGNED) { - goal = TabletGoalState.HOSTED; - } - if (Manager.log.isTraceEnabled()) { - Manager.log.trace( - "[{}] Shutting down all Tservers: {}, dependentCount: {} Extent: {}, state: {}, goal: {}", - store.name(), manager.serversToShutdown.equals(currentTServers.keySet()), - dependentWatcher == null ? "null" : dependentWatcher.assignedOrHosted(), tls.extent, - state, goal); + var tServersSnapshot = manager.tserversSnapshot(); + + return new TabletManagementParameters(manager.getManagerState(), parentLevelUpgrade, + manager.onlineTables(), tServersSnapshot, shutdownServers, manager.migrationsSnapshot(), + store.getLevel(), manager.getCompactionHints(store.getLevel()), canSuspendTablets(), + lookForTabletsNeedingVolReplacement ? manager.getContext().getVolumeReplacements() + : Map.of()); + } + + private Set<TServerInstance> getFilteredServersToShutdown() { + return filteredServersToShutdown; + } + + private static class TableMgmtStats { + int[] counts = new int[TabletState.values().length]; + private int totalUnloaded; + private long totalVolumeReplacements; + private int tabletsWithErrors; + } + + private TableMgmtStats manageTablets(Iterator<TabletManagement> iter, + TabletManagementParameters tableMgmtParams, + SortedMap<TServerInstance,TabletServerStatus> currentTServers, boolean isFullScan) + throws BadLocationStateException, TException, DistributedStoreException, WalMarkerException, + IOException { + + final TableMgmtStats tableMgmtStats = new TableMgmtStats(); + final boolean shuttingDownAllTabletServers = + tableMgmtParams.getServersToShutdown().equals(currentTServers.keySet()); + if (shuttingDownAllTabletServers && !isFullScan) { + // If we are shutting down all of the TabletServers, then don't process any events + // from the EventCoordinator. + LOG.debug("Partial scan requested, but aborted due to shutdown of all TabletServers"); + return tableMgmtStats; + } + + int unloaded = 0; + + TabletLists tLists = new TabletLists(currentTServers, tableMgmtParams.getGroupedTServers(), + tableMgmtParams.getServersToShutdown()); + + CompactionJobGenerator compactionGenerator = new CompactionJobGenerator( + new ServiceEnvironmentImpl(manager.getContext()), tableMgmtParams.getCompactionHints()); + + Set<TServerInstance> filteredServersToShutdown = + new HashSet<>(tableMgmtParams.getServersToShutdown()); + + while (iter.hasNext()) { + final TabletManagement mti = iter.next(); + if (mti == null) { + throw new IllegalStateException("State store returned a null ManagerTabletInfo object"); + } + + final TabletMetadata tm = mti.getTabletMetadata(); + + final String mtiError = mti.getErrorMessage(); + if (mtiError != null) { + // An error happened on the TabletServer in the TabletManagementIterator + // when trying to process this extent. + LOG.warn( + "Error on TabletServer trying to get Tablet management information for extent: {}. Error message: {}", + tm.getExtent(), mtiError); + this.metrics.incrementTabletGroupWatcherError(this.store.getLevel()); + tableMgmtStats.tabletsWithErrors++; + continue; + } + + final TableId tableId = tm.getTableId(); + // ignore entries for tables that do not exist in zookeeper + if (manager.getTableManager().getTableState(tableId) == null) { + continue; + } + + // Don't overwhelm the tablet servers with work + if (tLists.unassigned.size() + unloaded + > Manager.MAX_TSERVER_WORK_CHUNK * currentTServers.size() + || tLists.volumeReplacements.size() > 1000) { + flushChanges(tLists); + tLists.reset(); + unloaded = 0; + } + + final TableConfiguration tableConf = manager.getContext().getTableConfiguration(tableId); + + TabletState state = TabletState.compute(tm, currentTServers.keySet()); + if (state == TabletState.ASSIGNED_TO_DEAD_SERVER) { + /* + * This code exists to deal with a race condition caused by two threads running in this + * class that compute tablets actions. One thread does full scans and the other reacts to + * events and does partial scans. Below is an example of the race condition this is + * handling. + * + * - TGW Thread 1 : reads the set of tablets servers and its empty + * + * - TGW Thread 2 : reads the set of tablet servers and its [TS1] + * + * - TGW Thread 2 : Sees tabletX without a location and assigns it to TS1 + * + * - TGW Thread 1 : Sees tabletX assigned to TS1 and assumes it's assigned to a dead tablet + * server because its set of live servers is the empty set. + * + * To deal with this race condition, this code recomputes the tablet state using the latest + * tservers when a tablet is seen assigned to a dead tserver. + */ + + TabletState newState = TabletState.compute(tm, manager.tserversSnapshot().getTservers()); + if (newState != state) { + LOG.debug("Tablet state changed when using latest set of tservers {} {} {}", + tm.getExtent(), state, newState); + state = newState; + } + } + tableMgmtStats.counts[state.ordinal()]++; + + // This is final because nothing in this method should change the goal. All computation of the + // goal should be done in TabletGoalState.compute() so that all parts of the Accumulo code + // will compute a consistent goal. + final TabletGoalState goal = + TabletGoalState.compute(tm, state, manager.tabletBalancer, tableMgmtParams); + + final Set<ManagementAction> actions = mti.getActions(); + + if (actions.contains(ManagementAction.NEEDS_RECOVERY) && goal != TabletGoalState.HOSTED) { + LOG.warn("Tablet has wals, but goal is not hosted. Tablet: {}, goal:{}", tm.getExtent(), + goal); + } + + if (actions.contains(ManagementAction.NEEDS_VOLUME_REPLACEMENT)) { + tableMgmtStats.totalVolumeReplacements++; + if (state == TabletState.UNASSIGNED || state == TabletState.SUSPENDED) { + var volRep = + VolumeUtil.computeVolumeReplacements(tableMgmtParams.getVolumeReplacements(), tm); + if (volRep.logsToRemove.size() + volRep.filesToRemove.size() > 0) { + if (tm.getLocation() != null) { + // since the totalVolumeReplacements counter was incremented, should try this again + // later after its unassigned + LOG.debug("Volume replacement needed for {} but it has a location {}.", + tm.getExtent(), tm.getLocation()); + } else if (tm.getOperationId() != null) { + LOG.debug("Volume replacement needed for {} but it has an active operation {}.", + tm.getExtent(), tm.getOperationId()); + } else { + LOG.debug("Volume replacement needed for {}.", tm.getExtent()); + // buffer replacements so that multiple mutations can be done at once + tLists.volumeReplacements.add(volRep); + } + } else { + LOG.debug("Volume replacement evaluation for {} returned no changes.", tm.getExtent()); } + } else { + LOG.debug("Volume replacement needed for {} but its tablet state is {}.", tm.getExtent(), + state); + } + } - // if we are shutting down all the tabletservers, we have to do it in order - if ((goal == TabletGoalState.SUSPENDED && state == TabletState.HOSTED) - && manager.serversToShutdown.equals(currentTServers.keySet())) { - if (dependentWatcher != null) { - // If the dependentWatcher is for the user tables, check to see - // that user tables exist. - DataLevel dependentLevel = dependentWatcher.store.getLevel(); - boolean userTablesExist = true; - switch (dependentLevel) { - case USER: - Set<TableId> onlineTables = manager.onlineTables(); - onlineTables.remove(AccumuloTable.ROOT.tableId()); - onlineTables.remove(AccumuloTable.METADATA.tableId()); - userTablesExist = !onlineTables.isEmpty(); - break; - case METADATA: - case ROOT: - default: - break; + if (actions.contains(ManagementAction.BAD_STATE) && tm.isFutureAndCurrentLocationSet()) { + throw new BadLocationStateException( + tm.getExtent() + " is both assigned and hosted, which should never happen: " + this, + tm.getExtent().toMetaRow()); + } + + final Location location = tm.getLocation(); + Location current = null; + Location future = null; + if (tm.hasCurrent()) { + current = tm.getLocation(); + } else { + future = tm.getLocation(); + } + TabletLogger.missassigned(tm.getExtent(), goal.toString(), state.toString(), + future != null ? future.getServerInstance() : null, + current != null ? current.getServerInstance() : null, tm.getLogs().size()); + + if (isFullScan) { + stats.update(tableId, state); + } + + if (Manager.log.isTraceEnabled()) { + Manager.log.trace( + "[{}] Shutting down all Tservers: {}, dependentCount: {} Extent: {}, state: {}, goal: {} actions:{} #wals:{}", + store.name(), tableMgmtParams.getServersToShutdown().equals(currentTServers.keySet()), + dependentWatcher == null ? "null" : dependentWatcher.assignedOrHosted(), tm.getExtent(), + state, goal, actions, tm.getLogs().size()); + } + + if (actions.contains(ManagementAction.NEEDS_SPLITTING)) { + LOG.debug("{} may need splitting.", tm.getExtent()); + manager.getSplitter().initiateSplit(new SeedSplitTask(manager, tm.getExtent())); + } + + if (actions.contains(ManagementAction.NEEDS_COMPACTING)) { + var jobs = compactionGenerator.generateJobs(tm, + TabletManagementIterator.determineCompactionKinds(actions)); + LOG.debug("{} may need compacting adding {} jobs", tm.getExtent(), jobs.size()); + manager.getCompactionCoordinator().addJobs(tm, jobs); + } + + // ELASITICITY_TODO the case where a planner generates compactions at time T1 for tablet + // and later at time T2 generates nothing for the same tablet is not being handled. At + // time T1 something could have been queued. However at time T2 we will not clear those + // entries from the queue because we see nothing here for that case. After a full + // metadata scan could remove any tablets that were not updated during the scan. + + if (actions.contains(ManagementAction.NEEDS_LOCATION_UPDATE) + || actions.contains(ManagementAction.NEEDS_RECOVERY)) { + + if (tm.getLocation() != null) { + filteredServersToShutdown.remove(tm.getLocation().getServerInstance()); + } + + if (goal == TabletGoalState.HOSTED) { + + // RecoveryManager.recoverLogs will return false when all of the logs + // have been sorted so that recovery can occur. Delay the hosting of + // the Tablet until the sorting is finished. + if ((state != TabletState.HOSTED && actions.contains(ManagementAction.NEEDS_RECOVERY)) + && manager.recoveryManager.recoverLogs(tm.getExtent(), tm.getLogs())) { + LOG.debug("Not hosting {} as it needs recovery, logs: {}", tm.getExtent(), + tm.getLogs().size()); + continue; + } + switch (state) { + case HOSTED: + if (location.getServerInstance().equals(manager.migrations.get(tm.getExtent()))) { + manager.migrations.remove(tm.getExtent()); } - // If the stats object in the dependentWatcher is empty, then it - // currently does not have data about what is hosted or not. In - // that case host these tablets until the dependent watcher can - // gather some data. - final Map<TableId,TableCounts> stats = dependentWatcher.getStats(); - if (dependentLevel == DataLevel.USER) { - if (userTablesExist - && (stats == null || stats.isEmpty() || assignedOrHosted(stats) > 0)) { - goal = TabletGoalState.HOSTED; - } - } else if (stats == null || stats.isEmpty() || assignedOrHosted(stats) > 0) { - goal = TabletGoalState.HOSTED; + break; + case ASSIGNED_TO_DEAD_SERVER: + hostDeadTablet(tLists, tm, location); + break; + case SUSPENDED: + hostSuspendedTablet(tLists, tm, location, tableConf); + break; + case UNASSIGNED: + hostUnassignedTablet(tLists, tm.getExtent(), + new UnassignedTablet(location, tm.getLast())); + break; + case ASSIGNED: + // Send another reminder + tLists.assigned.add(new Assignment(tm.getExtent(), + future != null ? future.getServerInstance() : null, tm.getLast())); + break; + default: + break; + } + } else { + switch (state) { + case SUSPENDED: + // Request a move to UNASSIGNED, so as to allow balancing to continue. + tLists.suspendedToGoneServers.add(tm); + cancelOfflineTableMigrations(tm.getExtent()); + break; + case UNASSIGNED: + cancelOfflineTableMigrations(tm.getExtent()); + break; + case ASSIGNED_TO_DEAD_SERVER: + unassignDeadTablet(tLists, tm); + break; + case HOSTED: + TServerConnection client = + manager.tserverSet.getConnection(location.getServerInstance()); + if (client != null) { + LOG.debug("Requesting tserver {} unload tablet {}", location.getServerInstance(), + tm.getExtent()); + client.unloadTablet(manager.managerLock, tm.getExtent(), goal.howUnload(), - manager.getSteadyTime()); ++ manager.getSteadyTime().getMillis()); + tableMgmtStats.totalUnloaded++; + unloaded++; + } else { + Manager.log.warn("Could not connect to server {}", location); } - } + break; + case ASSIGNED: + break; } + } + } + } - if (goal == TabletGoalState.HOSTED) { - if ((state != TabletState.HOSTED && !tls.walogs.isEmpty()) - && manager.recoveryManager.recoverLogs(tls.extent, tls.walogs)) { - continue; - } - switch (state) { - case HOSTED: - if (location.getServerInstance().equals(manager.migrations.get(tls.extent))) { - manager.migrations.remove(tls.extent); - } - break; - case ASSIGNED_TO_DEAD_SERVER: - hostDeadTablet(tLists, tls, location, wals); - break; - case SUSPENDED: - hostSuspendedTablet(tLists, tls, location, tableConf); - break; - case UNASSIGNED: - hostUnassignedTablet(tLists, tls.extent, new UnassignedTablet(location, tls.last)); - break; - case ASSIGNED: - // Send another reminder - tLists.assigned.add(new Assignment(tls.extent, tls.getFutureServer(), tls.last)); - break; - } - } else { - switch (state) { - case SUSPENDED: - // Request a move to UNASSIGNED, so as to allow balancing to continue. - tLists.suspendedToGoneServers.add(tls); - cancelOfflineTableMigrations(tls.extent); - break; - case UNASSIGNED: - cancelOfflineTableMigrations(tls.extent); - break; - case ASSIGNED_TO_DEAD_SERVER: - unassignDeadTablet(tLists, tls, wals); - break; - case HOSTED: - TServerConnection client = - manager.tserverSet.getConnection(location.getServerInstance()); - if (client != null) { - try { - Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(), - location.getServerInstance(), tls.extent, goal.howUnload()); - client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(), - manager.getSteadyTime().getMillis()); - unloaded++; - totalUnloaded++; - } catch (TException tException) { - Manager.log.warn("[{}] Failed to request tablet unload {} {} {}", store.name(), - location.getServerInstance(), tls.extent, goal.howUnload(), tException); - } - } else { - Manager.log.warn("Could not connect to server {}", location); - } - break; - case ASSIGNED: - break; - } + flushChanges(tLists); + + if (isFullScan) { + this.filteredServersToShutdown = Set.copyOf(filteredServersToShutdown); + } + + return tableMgmtStats; + } + + private SortedMap<TServerInstance,TabletServerStatus> + getCurrentTservers(Set<TServerInstance> onlineTservers) { + // Get the current status for the current list of tservers + final SortedMap<TServerInstance,TabletServerStatus> currentTServers = new TreeMap<>(); + for (TServerInstance entry : onlineTservers) { + currentTServers.put(entry, manager.tserverStatus.get(entry)); + } + return currentTServers; + } + + @Override + public void run() { + int[] oldCounts = new int[TabletState.values().length]; + boolean lookForTabletsNeedingVolReplacement = true; + + while (manager.stillManager()) { + if (!eventHandler.isNeedsFullScan()) { + // If an event handled by the EventHandler.RangeProcessor indicated + // that we need to do a full scan, then do it. Otherwise wait a bit + // before re-checking the tablets. + sleepUninterruptibly(100, TimeUnit.MILLISECONDS); + } + + final long waitTimeBetweenScans = manager.getConfiguration() + .getTimeInMillis(Property.MANAGER_TABLET_GROUP_WATCHER_INTERVAL); + + TabletManagementParameters tableMgmtParams = + createTabletManagementParameters(lookForTabletsNeedingVolReplacement); + var currentTServers = getCurrentTservers(tableMgmtParams.getOnlineTsevers()); + + ClosableIterator<TabletManagement> iter = null; + try { + if (currentTServers.isEmpty()) { + eventHandler.waitForFullScan(waitTimeBetweenScans); + synchronized (this) { + lastScanServers = Collections.emptySortedSet(); } - counts[state.ordinal()]++; + continue; } - flushChanges(tLists, wals); + stats.begin(); + + ManagerState managerState = tableMgmtParams.getManagerState(); + + // Clear the need for a full scan before starting a full scan inorder to detect events that + // happen during the full scan. + eventHandler.clearNeedsFullScan(); + + iter = store.iterator(tableMgmtParams); + manager.getCompactionCoordinator().getJobQueues().beginFullScan(store.getLevel()); + var tabletMgmtStats = manageTablets(iter, tableMgmtParams, currentTServers, true); + manager.getCompactionCoordinator().getJobQueues().endFullScan(store.getLevel()); + + // If currently looking for volume replacements, determine if the next round needs to look. + if (lookForTabletsNeedingVolReplacement) { + // Continue to look for tablets needing volume replacement if there was an error + // processing tablets in the call to manageTablets() or if we are still performing volume + // replacement. We only want to stop looking for tablets that need volume replacement when + // we have successfully processed all tablet metadata and no more volume replacements are + // being performed. + lookForTabletsNeedingVolReplacement = tabletMgmtStats.totalVolumeReplacements != 0 + || tabletMgmtStats.tabletsWithErrors != 0; + } // provide stats after flushing changes to avoid race conditions w/ delete table stats.end(managerState); @@@ -768,9 -452,9 +768,9 @@@ } } - private void hostSuspendedTablet(TabletLists tLists, TabletLocationState tls, Location location, + private void hostSuspendedTablet(TabletLists tLists, TabletMetadata tm, Location location, TableConfiguration tableConf) { - if (manager.getSteadyTime() - tm.getSuspend().suspensionTime - if (manager.getSteadyTime().getMillis() - tls.suspend.suspensionTime ++ if (manager.getSteadyTime().getMillis() - tm.getSuspend().suspensionTime < tableConf.getTimeInMillis(Property.TABLE_SUSPEND_DURATION)) { // Tablet is suspended. See if its tablet server is back. TServerInstance returnInstance = null;