This is an automated email from the ASF dual-hosted git repository. krathbun pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 4e6aa7d7bb06741846c1b192232ea9833077ec6e Merge: 6407c31147 c0979fd660 Author: Kevin Rathbun <kevinrr...@gmail.com> AuthorDate: Tue Dec 10 12:54:59 2024 -0500 Merge branch '3.1' .../org/apache/accumulo/core/fate/AdminUtil.java | 13 -- .../core/iterators/user/WholeRowIterator.java | 10 +- .../org/apache/accumulo/server/util/Admin.java | 68 ++---- .../server/util/CheckForMetadataProblems.java | 94 +++++---- .../accumulo/server/util/FindOfflineTablets.java | 41 ++-- .../server/util/RemoveEntriesForMissingFiles.java | 52 +++-- .../server/util/checkCommand/CheckRunner.java | 25 ++- .../util/checkCommand/MetadataCheckRunner.java | 184 +++++++++++++++++ .../checkCommand/MetadataTableCheckRunner.java | 68 +++++- .../util/checkCommand/RootMetadataCheckRunner.java | 111 +++++++++- .../util/checkCommand/RootTableCheckRunner.java | 70 ++++++- .../util/checkCommand/SystemConfigCheckRunner.java | 11 +- .../util/checkCommand/SystemFilesCheckRunner.java | 18 +- .../util/checkCommand/TableLocksCheckRunner.java | 121 +++++++++++ .../util/checkCommand/UserFilesCheckRunner.java | 24 ++- .../accumulo/server/util/AdminCommandsTest.java | 7 - .../org/apache/accumulo/test/AdminCheckIT.java | 227 ++++++++++++++++++--- test/src/main/resources/log4j2-test.properties | 3 + 18 files changed, 953 insertions(+), 194 deletions(-) diff --cc core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java index 059044b754,80b21fc04d..0fe448d7d5 --- a/core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java +++ b/core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java @@@ -31,14 -31,10 +31,13 @@@ import java.util.Formatter import java.util.HashMap; import java.util.List; import java.util.Map; --import java.util.Map.Entry; import java.util.Set; +import java.util.stream.Stream; -import org.apache.accumulo.core.fate.ReadOnlyTStore.TStatus; +import org.apache.accumulo.core.fate.FateStore.FateTxStore; +import org.apache.accumulo.core.fate.ReadOnlyFateStore.FateIdStatus; +import org.apache.accumulo.core.fate.ReadOnlyFateStore.ReadOnlyFateTxStore; +import org.apache.accumulo.core.fate.ReadOnlyFateStore.TStatus; import org.apache.accumulo.core.fate.zookeeper.FateLock; import org.apache.accumulo.core.fate.zookeeper.FateLock.FateLockPath; import org.apache.accumulo.core.fate.zookeeper.ZooReader; @@@ -429,22 -424,10 +428,10 @@@ public class AdminUtil<T> txStatus.getWaitingLocks(), txStatus.getTop(), txStatus.getTimeCreatedFormatted()); } fmt.format(" %s transactions", fateStatus.getTransactions().size()); - - if (!fateStatus.getDanglingHeldLocks().isEmpty() - || !fateStatus.getDanglingWaitingLocks().isEmpty()) { - fmt.format("%nThe following locks did not have an associated FATE operation%n"); - for (Entry<FateId,List<String>> entry : fateStatus.getDanglingHeldLocks().entrySet()) { - fmt.format("fateId: %s locked: %s%n", entry.getKey(), entry.getValue()); - } - - for (Entry<FateId,List<String>> entry : fateStatus.getDanglingWaitingLocks().entrySet()) { - fmt.format("fateId: %s locking: %s%n", entry.getKey(), entry.getValue()); - } - } } - public boolean prepDelete(TStore<T> zs, ZooReaderWriter zk, ServiceLockPath path, - String txidStr) { + public boolean prepDelete(Map<FateInstanceType,FateStore<T>> stores, ZooReaderWriter zk, + ServiceLockPath path, String fateIdStr) { if (!checkGlobalLock(zk, path)) { return false; } diff --cc server/base/src/main/java/org/apache/accumulo/server/util/Admin.java index 56f2fe7c09,01c7cc7625..4fadd83358 --- a/server/base/src/main/java/org/apache/accumulo/server/util/Admin.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/Admin.java @@@ -1059,171 -988,9 +1032,172 @@@ public class Admin implements KeywordEx return statusFilter; } + /** + * If provided on the command line, get the FateInstanceType values provided. + * + * @return a set of fate instance types filters, or null if none provided + */ + private EnumSet<FateInstanceType> getCmdLineInstanceTypeFilters(List<String> instanceTypes) { + EnumSet<FateInstanceType> typesFilter = null; + if (!instanceTypes.isEmpty()) { + typesFilter = EnumSet.noneOf(FateInstanceType.class); + for (String instanceType : instanceTypes) { + typesFilter.add(FateInstanceType.valueOf(instanceType)); + } + } + return typesFilter; + } + + private static long printDanglingFateOperations(ServerContext context, String tableName) + throws Exception { + long totalDanglingSeen = 0; + if (tableName == null) { + for (var dataLevel : Ample.DataLevel.values()) { + try (var tablets = context.getAmple().readTablets().forLevel(dataLevel).build()) { + totalDanglingSeen += printDanglingFateOperations(context, tablets); + } + } + } else { + var tableId = context.getTableId(tableName); + try (var tablets = context.getAmple().readTablets().forTable(tableId).build()) { + totalDanglingSeen += printDanglingFateOperations(context, tablets); + } + } + + System.out.printf("\nFound %,d dangling references to fate operations\n", totalDanglingSeen); + + return totalDanglingSeen; + } + + private static long printDanglingFateOperations(ServerContext context, + Iterable<TabletMetadata> tablets) throws Exception { + Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> tabletLookup = extents -> { + try (var lookedupTablets = + context.getAmple().readTablets().forTablets(extents, Optional.empty()).build()) { + Map<KeyExtent,TabletMetadata> tabletMap = new HashMap<>(); + lookedupTablets + .forEach(tabletMetadata -> tabletMap.put(tabletMetadata.getExtent(), tabletMetadata)); + return tabletMap; + } + }; + + UserFateStore<?> ufs = new UserFateStore<>(context, createDummyLockID(), null); + MetaFateStore<?> mfs = new MetaFateStore<>(context.getZooKeeperRoot() + Constants.ZFATE, + context.getZooReaderWriter(), createDummyLockID(), null); + LoadingCache<FateId,ReadOnlyFateStore.TStatus> fateStatusCache = Caffeine.newBuilder() + .maximumSize(100_000).expireAfterWrite(10, TimeUnit.SECONDS).build(fateId -> { + if (fateId.getType() == FateInstanceType.META) { + return mfs.read(fateId).getStatus(); + } else { + return ufs.read(fateId).getStatus(); + } + }); + + Predicate<FateId> activePredicate = fateId -> { + var status = fateStatusCache.get(fateId); + switch (status) { + case NEW: + case IN_PROGRESS: + case SUBMITTED: + case FAILED_IN_PROGRESS: + return true; + case FAILED: + case SUCCESSFUL: + case UNKNOWN: + return false; + default: + throw new IllegalStateException("Unexpected status: " + status); + } + }; + + AtomicLong danglingSeen = new AtomicLong(); + BiConsumer<KeyExtent,Set<FateId>> danglingConsumer = (extent, fateIds) -> { + danglingSeen.addAndGet(fateIds.size()); + fateIds.forEach(fateId -> System.out.println(fateId + " " + extent)); + }; + + findDanglingFateOperations(tablets, tabletLookup, activePredicate, danglingConsumer, 10_000); + return danglingSeen.get(); + } + + /** + * Finds tablets that point to fate operations that do not exists or are complete. + * + * @param tablets the tablets to inspect + * @param tabletLookup a function that can lookup a tablets latest metadata + * @param activePredicate a predicate that can determine if a fate id is currently active + * @param danglingConsumer a consumer that tablets with inactive fate ids will be sent to + */ + static void findDanglingFateOperations(Iterable<TabletMetadata> tablets, + Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> tabletLookup, + Predicate<FateId> activePredicate, BiConsumer<KeyExtent,Set<FateId>> danglingConsumer, + int bufferSize) { + + ArrayList<FateId> fateIds = new ArrayList<>(); + Map<KeyExtent,Set<FateId>> candidates = new HashMap<>(); + for (TabletMetadata tablet : tablets) { + fateIds.clear(); + getAllFateIds(tablet, fateIds::add); + fateIds.removeIf(activePredicate); + if (!fateIds.isEmpty()) { + candidates.put(tablet.getExtent(), new HashSet<>(fateIds)); + if (candidates.size() > bufferSize) { + processCandidates(candidates, tabletLookup, danglingConsumer); + candidates.clear(); + } + } + } + + processCandidates(candidates, tabletLookup, danglingConsumer); + } + + private static void processCandidates(Map<KeyExtent,Set<FateId>> candidates, + Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> tabletLookup, + BiConsumer<KeyExtent,Set<FateId>> danglingConsumer) { + // Perform a 2nd check of the tablet to avoid race conditions like the following. + // 1. THREAD 1 : TabletMetadata is read and points to active fate operation + // 2. THREAD 2 : The fate operation is deleted from the tablet + // 3. THREAD 2 : The fate operation completes + // 4. THREAD 1 : Checks if the fate operation read in step 1 is active and finds it is not + + Map<KeyExtent,TabletMetadata> currentTablets = tabletLookup.apply(candidates.keySet()); + HashSet<FateId> currentFateIds = new HashSet<>(); + candidates.forEach((extent, fateIds) -> { + var currentTablet = currentTablets.get(extent); + if (currentTablet != null) { + currentFateIds.clear(); + getAllFateIds(currentTablet, currentFateIds::add); + // Only keep fate ids that are still present in the tablet. Any new fate ids in + // currentFateIds that were not seen on the first pass are not considered here. To check + // those new ones, the entire two-step process would need to be rerun. + fateIds.retainAll(currentFateIds); + + if (!fateIds.isEmpty()) { + // the fateIds in this set were found to be inactive and still exist in the tablet + // metadata after being found inactive + danglingConsumer.accept(extent, fateIds); + } + } // else the tablet no longer exist so nothing to report + }); + } + + /** + * Extracts all fate ids that a tablet points to from any field. + */ + private static void getAllFateIds(TabletMetadata tabletMetadata, + Consumer<FateId> fateIdConsumer) { + tabletMetadata.getLoaded().values().forEach(fateIdConsumer); + if (tabletMetadata.getSelectedFiles() != null) { + fateIdConsumer.accept(tabletMetadata.getSelectedFiles().getFateId()); + } + if (tabletMetadata.getOperationId() != null) { + fateIdConsumer.accept(tabletMetadata.getOperationId().getFateId()); + } + } + @VisibleForTesting - public static void executeCheckCommand(ServerContext context, CheckCommand cmd) { + public static void executeCheckCommand(ServerContext context, CheckCommand cmd, + ServerUtilOpts opts) throws Exception { validateAndTransformCheckCommand(cmd); if (cmd.list) { diff --cc server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java index d09436e5bd,0cfa61bf0d..d17c2cc691 --- a/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java @@@ -21,9 -21,12 +21,10 @@@ package org.apache.accumulo.server.util import java.util.Iterator; import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; + import java.util.function.Consumer; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.data.Range; -import org.apache.accumulo.core.data.TableId; -import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.client.admin.TabletAvailability; import org.apache.accumulo.core.manager.state.tables.TableState; import org.apache.accumulo.core.metadata.AccumuloTable; import org.apache.accumulo.core.metadata.TServerInstance; @@@ -76,13 -83,15 +79,16 @@@ public class FindOfflineTablets tservers.startListeningForTabletServerChanges(); scanning.set(true); - Iterator<TabletLocationState> zooScanner = - TabletStateStore.getStoreForLevel(DataLevel.ROOT, context).iterator(); - int offline = 0; - try (TabletsMetadata tabletsMetadata = - context.getAmple().readTablets().forLevel(DataLevel.ROOT).build()) { - System.out.println("Scanning zookeeper"); - if ((offline = checkTablets(context, tabletsMetadata.iterator(), tservers)) > 0) { - return offline; + if (!skipZkScan) { - printInfoMethod.accept("Scanning zookeeper"); - if ((offline = checkTablets(context, zooScanner, tservers, printProblemMethod)) > 0) { - return offline; ++ try (TabletsMetadata tabletsMetadata = ++ context.getAmple().readTablets().forLevel(DataLevel.ROOT).build()) { ++ printInfoMethod.accept("Scanning zookeeper"); ++ if ((offline = ++ checkTablets(context, tabletsMetadata.iterator(), tservers, printProblemMethod)) > 0) { ++ return offline; ++ } } } @@@ -90,11 -99,12 +96,14 @@@ return 0; } - System.out.println("Scanning " + AccumuloTable.ROOT.tableName()); - try (TabletsMetadata tabletsMetadata = - context.getAmple().readTablets().forLevel(DataLevel.METADATA).build()) { - if ((offline = checkTablets(context, tabletsMetadata.iterator(), tservers)) > 0) { - return offline; + if (!skipRootScan) { + printInfoMethod.accept("Scanning " + AccumuloTable.ROOT.tableName()); - Iterator<TabletLocationState> rootScanner = new MetaDataTableScanner(context, - TabletsSection.getRange(), AccumuloTable.ROOT.tableName()); - if ((offline = checkTablets(context, rootScanner, tservers, printProblemMethod)) > 0) { - return offline; ++ try (TabletsMetadata tabletsMetadata = ++ context.getAmple().readTablets().forLevel(DataLevel.METADATA).build()) { ++ if ((offline = ++ checkTablets(context, tabletsMetadata.iterator(), tservers, printProblemMethod)) > 0) { ++ return offline; ++ } } } @@@ -102,27 -112,32 +111,27 @@@ return 0; } - System.out.println("Scanning " + AccumuloTable.METADATA.tableName()); + printInfoMethod.accept("Scanning " + AccumuloTable.METADATA.tableName()); - Range range = TabletsSection.getRange(); - if (tableName != null) { - TableId tableId = context.getTableId(tableName); - range = new KeyExtent(tableId, null, null).toMetaRange(); - } - - try (MetaDataTableScanner metaScanner = - new MetaDataTableScanner(context, range, AccumuloTable.METADATA.tableName())) { - return checkTablets(context, metaScanner, tservers, printProblemMethod); + try (var metaScanner = context.getAmple().readTablets().forLevel(DataLevel.USER).build()) { - return checkTablets(context, metaScanner.iterator(), tservers); ++ return checkTablets(context, metaScanner.iterator(), tservers, printProblemMethod); } } - private static int checkTablets(ServerContext context, Iterator<TabletLocationState> scanner, + private static int checkTablets(ServerContext context, Iterator<TabletMetadata> scanner, - LiveTServerSet tservers) { + LiveTServerSet tservers, Consumer<String> printProblemMethod) { int offline = 0; while (scanner.hasNext() && !System.out.checkError()) { - TabletLocationState locationState = scanner.next(); - TabletState state = locationState.getState(tservers.getCurrentServers()); + TabletMetadata tabletMetadata = scanner.next(); + Set<TServerInstance> liveTServers = tservers.getCurrentServers(); + TabletState state = TabletState.compute(tabletMetadata, liveTServers); if (state != null && state != TabletState.HOSTED - && context.getTableManager().getTableState(locationState.extent.tableId()) - != TableState.OFFLINE) { - printProblemMethod - .accept(locationState + " is " + state + " #walogs:" + locationState.walogs.size()); + && tabletMetadata.getTabletAvailability() == TabletAvailability.HOSTED + && context.getTableManager().getTableState(tabletMetadata.getTableId()) + == TableState.ONLINE) { - System.out.println(tabletMetadata.getExtent() + " is " + state + " #walogs:" ++ printProblemMethod.accept(tabletMetadata.getExtent() + " is " + state + " #walogs:" + + tabletMetadata.getLogs().size()); offline++; } } diff --cc server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/RootMetadataCheckRunner.java index bd4282246d,3b4cd1112b..99dee426ca --- a/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/RootMetadataCheckRunner.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/RootMetadataCheckRunner.java @@@ -24,12 -41,96 +41,96 @@@ public class RootMetadataCheckRunner im private static final Admin.CheckCommand.Check check = Admin.CheckCommand.Check.ROOT_METADATA; @Override - public Admin.CheckCommand.CheckStatus runCheck() { + public String tableName() { + throw new UnsupportedOperationException(); + } + + @Override + public TableId tableId() { + throw new UnsupportedOperationException(); + } + + @Override + public Set<ColumnFQ> requiredColFQs() { + return Set.of(MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN, + MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN, + MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN, + MetadataSchema.TabletsSection.ServerColumnFamily.LOCK_COLUMN); + } + + @Override + public Set<Text> requiredColFams() { + return Set.of(MetadataSchema.TabletsSection.CurrentLocationColumnFamily.NAME); + } + + @Override + public String scanning() { + return "root tablet metadata in ZooKeeper"; + } + + @Override + public Admin.CheckCommand.CheckStatus runCheck(ServerContext context, ServerUtilOpts opts, + boolean fixFiles) throws TableNotFoundException, InterruptedException, KeeperException { Admin.CheckCommand.CheckStatus status = Admin.CheckCommand.CheckStatus.OK; + printRunning(); + + log.trace("********** Looking for offline tablets **********"); + if (FindOfflineTablets.findOffline(context, AccumuloTable.ROOT.tableName(), false, true, + log::trace, log::warn) != 0) { + status = Admin.CheckCommand.CheckStatus.FAILED; + } else { + log.trace("All good... No offline tablets found"); + } + + log.trace("********** Looking for missing columns **********"); + status = checkRequiredColumns(context, status); + + log.trace("********** Looking for invalid columns **********"); + final String path = context.getZooKeeperRoot() + RootTable.ZROOT_TABLET; + final String json = new String(context.getZooReader().getData(path), UTF_8); + final var rtm = new RootTabletMetadata(json); - status = checkColumns(context, rtm.toKeyValues().iterator(), status); ++ status = checkColumns(context, rtm.getKeyValues().iterator(), status); + + printCompleted(status); + return status; + } + + @Override + public Admin.CheckCommand.CheckStatus checkRequiredColumns(ServerContext context, + Admin.CheckCommand.CheckStatus status) + throws TableNotFoundException, InterruptedException, KeeperException { + final String path = context.getZooKeeperRoot() + RootTable.ZROOT_TABLET; + final String json = new String(context.getZooReader().getData(path), UTF_8); + final var rtm = new RootTabletMetadata(json); + final Set<Text> rowsSeen = new HashSet<>(); + final Set<ColumnFQ> requiredColFQs = new HashSet<>(requiredColFQs()); + final Set<Text> requiredColFams = new HashSet<>(requiredColFams()); + + log.trace("Scanning the {} for missing required columns...\n", scanning()); - rtm.toKeyValues().forEach(e -> { ++ rtm.getKeyValues().forEach(e -> { + var key = e.getKey(); + rowsSeen.add(key.getRow()); + boolean removed = + requiredColFQs.remove(new ColumnFQ(key.getColumnFamily(), key.getColumnQualifier())); + if (!removed) { + requiredColFams.remove(key.getColumnFamily()); + } + }); + + if (rowsSeen.size() != 1) { + status = Admin.CheckCommand.CheckStatus.FAILED; + log.warn("Did not see one tablet for the root table!"); + } else { + if (!requiredColFQs.isEmpty() || !requiredColFams.isEmpty()) { + log.warn("Tablet {} is missing required columns: col FQs: {}, col fams: {} in the {}\n", + rowsSeen.stream().findFirst().orElseThrow(), requiredColFQs, requiredColFams, + scanning()); + status = Admin.CheckCommand.CheckStatus.FAILED; + } else { + log.trace("...The {} contains all required columns for the root tablet\n", scanning()); + } + } - System.out.println("Running check " + check); - // work - System.out.println("Check " + check + " completed with status " + status); return status; } diff --cc server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/TableLocksCheckRunner.java index 0000000000,9161c9867d..c7c04079c5 mode 000000,100644..100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/TableLocksCheckRunner.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/TableLocksCheckRunner.java @@@ -1,0 -1,112 +1,121 @@@ + /* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.apache.accumulo.server.util.checkCommand; + + import java.util.List; + import java.util.Map; + + import org.apache.accumulo.core.Constants; + import org.apache.accumulo.core.client.AccumuloException; + import org.apache.accumulo.core.client.AccumuloSecurityException; ++import org.apache.accumulo.core.fate.AbstractFateStore; + import org.apache.accumulo.core.fate.AdminUtil; -import org.apache.accumulo.core.fate.ZooStore; ++import org.apache.accumulo.core.fate.FateId; ++import org.apache.accumulo.core.fate.FateInstanceType; ++import org.apache.accumulo.core.fate.user.UserFateStore; ++import org.apache.accumulo.core.fate.zookeeper.MetaFateStore; + import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter; -import org.apache.accumulo.core.lock.ServiceLock; ++import org.apache.accumulo.core.metadata.AccumuloTable; + import org.apache.accumulo.server.ServerContext; + import org.apache.accumulo.server.cli.ServerUtilOpts; + import org.apache.accumulo.server.util.Admin; + import org.apache.zookeeper.KeeperException; + + public class TableLocksCheckRunner implements CheckRunner { + private static final Admin.CheckCommand.Check check = Admin.CheckCommand.Check.TABLE_LOCKS; + + @Override + public Admin.CheckCommand.CheckStatus runCheck(ServerContext context, ServerUtilOpts opts, + boolean fixFiles) throws Exception { + Admin.CheckCommand.CheckStatus status = Admin.CheckCommand.CheckStatus.OK; + printRunning(); + + log.trace("********** Checking some references **********"); + status = checkTableLocks(context, status); + + printCompleted(status); + return status; + } + + @Override + public Admin.CheckCommand.Check getCheck() { + return check; + } + + private static Admin.CheckCommand.CheckStatus checkTableLocks(ServerContext context, + Admin.CheckCommand.CheckStatus status) + throws InterruptedException, KeeperException, AccumuloException, AccumuloSecurityException { + final AdminUtil<Admin> admin = new AdminUtil<>(true); + final String zkRoot = context.getZooKeeperRoot(); - final var zTableLocksPath = ServiceLock.path(zkRoot + Constants.ZTABLE_LOCKS); ++ final var zTableLocksPath = context.getServerPaths().createTableLocksPath(); + final String fateZkPath = zkRoot + Constants.ZFATE; + final ZooReaderWriter zk = context.getZooReaderWriter(); - final ZooStore<Admin> zs = new ZooStore<>(fateZkPath, zk); ++ final MetaFateStore<Admin> mfs = ++ new MetaFateStore<>(fateZkPath, zk, AbstractFateStore.createDummyLockID(), null); ++ final UserFateStore<Admin> ufs = new UserFateStore<>(context, AccumuloTable.FATE.tableName(), ++ AbstractFateStore.createDummyLockID(), null); + + log.trace("Ensuring table and namespace locks are valid..."); + + var tableIds = context.tableOperations().tableIdMap().values(); + var namespaceIds = context.namespaceOperations().namespaceIdMap().values(); + List<String> lockedIds = zk.getChildren(zTableLocksPath.toString()); + boolean locksExist = !lockedIds.isEmpty(); + + if (locksExist) { + lockedIds.removeAll(tableIds); + lockedIds.removeAll(namespaceIds); + if (!lockedIds.isEmpty()) { + status = Admin.CheckCommand.CheckStatus.FAILED; + log.warn("...Some table and namespace locks are INVALID (the table/namespace DNE): " + + lockedIds); + } else { + log.trace("...locks are valid"); + } + } else { + log.trace("...no locks present"); + } + + log.trace("Ensuring table and namespace locks are associated with a FATE op..."); + + if (locksExist) { - final var fateStatus = admin.getStatus(zs, zk, zTableLocksPath, null, null); ++ final var fateStatus = ++ admin.getStatus(Map.of(FateInstanceType.META, mfs, FateInstanceType.USER, ufs), zk, ++ zTableLocksPath, null, null, null); + if (!fateStatus.getDanglingHeldLocks().isEmpty() + || !fateStatus.getDanglingWaitingLocks().isEmpty()) { + status = Admin.CheckCommand.CheckStatus.FAILED; + log.warn("The following locks did not have an associated FATE operation\n"); - for (Map.Entry<String,List<String>> entry : fateStatus.getDanglingHeldLocks().entrySet()) { - log.warn("txid: " + entry.getKey() + " locked: " + entry.getValue()); ++ for (Map.Entry<FateId,List<String>> entry : fateStatus.getDanglingHeldLocks().entrySet()) { ++ log.warn("fateId: " + entry.getKey() + " locked: " + entry.getValue()); + } - for (Map.Entry<String,List<String>> entry : fateStatus.getDanglingWaitingLocks() ++ for (Map.Entry<FateId,List<String>> entry : fateStatus.getDanglingWaitingLocks() + .entrySet()) { - log.warn("txid: " + entry.getKey() + " locking: " + entry.getValue()); ++ log.warn("fateId: " + entry.getKey() + " locking: " + entry.getValue()); + } + } else { + log.trace("...locks are valid"); + } + } else { + log.trace("...no locks present"); + } + + return status; + } + } diff --cc test/src/main/resources/log4j2-test.properties index ac62675d85,fa8c7864ba..e6fde6790a --- a/test/src/main/resources/log4j2-test.properties +++ b/test/src/main/resources/log4j2-test.properties @@@ -142,11 -142,8 +142,14 @@@ logger.38.level = debu logger.39.name = org.apache.accumulo.manager.Manager logger.39.level = trace -logger.40.name = org.apache.accumulo.server.util.checkCommand.CheckRunner +logger.40.name = org.apache.accumulo.tablet logger.40.level = trace +logger.41.name = org.apache.accumulo.server.metadata +logger.41.level = trace + ++logger.42.name = org.apache.accumulo.server.util.checkCommand.CheckRunner ++logger.42.level = trace ++ rootLogger.level = debug rootLogger.appenderRef.console.ref = STDOUT