This is an automated email from the ASF dual-hosted git repository.

krathbun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git

commit 4e6aa7d7bb06741846c1b192232ea9833077ec6e
Merge: 6407c31147 c0979fd660
Author: Kevin Rathbun <kevinrr...@gmail.com>
AuthorDate: Tue Dec 10 12:54:59 2024 -0500

    Merge branch '3.1'

 .../org/apache/accumulo/core/fate/AdminUtil.java   |  13 --
 .../core/iterators/user/WholeRowIterator.java      |  10 +-
 .../org/apache/accumulo/server/util/Admin.java     |  68 ++----
 .../server/util/CheckForMetadataProblems.java      |  94 +++++----
 .../accumulo/server/util/FindOfflineTablets.java   |  41 ++--
 .../server/util/RemoveEntriesForMissingFiles.java  |  52 +++--
 .../server/util/checkCommand/CheckRunner.java      |  25 ++-
 .../util/checkCommand/MetadataCheckRunner.java     | 184 +++++++++++++++++
 .../checkCommand/MetadataTableCheckRunner.java     |  68 +++++-
 .../util/checkCommand/RootMetadataCheckRunner.java | 111 +++++++++-
 .../util/checkCommand/RootTableCheckRunner.java    |  70 ++++++-
 .../util/checkCommand/SystemConfigCheckRunner.java |  11 +-
 .../util/checkCommand/SystemFilesCheckRunner.java  |  18 +-
 .../util/checkCommand/TableLocksCheckRunner.java   | 121 +++++++++++
 .../util/checkCommand/UserFilesCheckRunner.java    |  24 ++-
 .../accumulo/server/util/AdminCommandsTest.java    |   7 -
 .../org/apache/accumulo/test/AdminCheckIT.java     | 227 ++++++++++++++++++---
 test/src/main/resources/log4j2-test.properties     |   3 +
 18 files changed, 953 insertions(+), 194 deletions(-)

diff --cc core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java
index 059044b754,80b21fc04d..0fe448d7d5
--- a/core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java
+++ b/core/src/main/java/org/apache/accumulo/core/fate/AdminUtil.java
@@@ -31,14 -31,10 +31,13 @@@ import java.util.Formatter
  import java.util.HashMap;
  import java.util.List;
  import java.util.Map;
--import java.util.Map.Entry;
  import java.util.Set;
 +import java.util.stream.Stream;
  
 -import org.apache.accumulo.core.fate.ReadOnlyTStore.TStatus;
 +import org.apache.accumulo.core.fate.FateStore.FateTxStore;
 +import org.apache.accumulo.core.fate.ReadOnlyFateStore.FateIdStatus;
 +import org.apache.accumulo.core.fate.ReadOnlyFateStore.ReadOnlyFateTxStore;
 +import org.apache.accumulo.core.fate.ReadOnlyFateStore.TStatus;
  import org.apache.accumulo.core.fate.zookeeper.FateLock;
  import org.apache.accumulo.core.fate.zookeeper.FateLock.FateLockPath;
  import org.apache.accumulo.core.fate.zookeeper.ZooReader;
@@@ -429,22 -424,10 +428,10 @@@ public class AdminUtil<T> 
            txStatus.getWaitingLocks(), txStatus.getTop(), 
txStatus.getTimeCreatedFormatted());
      }
      fmt.format(" %s transactions", fateStatus.getTransactions().size());
- 
-     if (!fateStatus.getDanglingHeldLocks().isEmpty()
-         || !fateStatus.getDanglingWaitingLocks().isEmpty()) {
-       fmt.format("%nThe following locks did not have an associated FATE 
operation%n");
-       for (Entry<FateId,List<String>> entry : 
fateStatus.getDanglingHeldLocks().entrySet()) {
-         fmt.format("fateId: %s  locked: %s%n", entry.getKey(), 
entry.getValue());
-       }
- 
-       for (Entry<FateId,List<String>> entry : 
fateStatus.getDanglingWaitingLocks().entrySet()) {
-         fmt.format("fateId: %s  locking: %s%n", entry.getKey(), 
entry.getValue());
-       }
-     }
    }
  
 -  public boolean prepDelete(TStore<T> zs, ZooReaderWriter zk, ServiceLockPath 
path,
 -      String txidStr) {
 +  public boolean prepDelete(Map<FateInstanceType,FateStore<T>> stores, 
ZooReaderWriter zk,
 +      ServiceLockPath path, String fateIdStr) {
      if (!checkGlobalLock(zk, path)) {
        return false;
      }
diff --cc server/base/src/main/java/org/apache/accumulo/server/util/Admin.java
index 56f2fe7c09,01c7cc7625..4fadd83358
--- a/server/base/src/main/java/org/apache/accumulo/server/util/Admin.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/Admin.java
@@@ -1059,171 -988,9 +1032,172 @@@ public class Admin implements KeywordEx
      return statusFilter;
    }
  
 +  /**
 +   * If provided on the command line, get the FateInstanceType values 
provided.
 +   *
 +   * @return a set of fate instance types filters, or null if none provided
 +   */
 +  private EnumSet<FateInstanceType> 
getCmdLineInstanceTypeFilters(List<String> instanceTypes) {
 +    EnumSet<FateInstanceType> typesFilter = null;
 +    if (!instanceTypes.isEmpty()) {
 +      typesFilter = EnumSet.noneOf(FateInstanceType.class);
 +      for (String instanceType : instanceTypes) {
 +        typesFilter.add(FateInstanceType.valueOf(instanceType));
 +      }
 +    }
 +    return typesFilter;
 +  }
 +
 +  private static long printDanglingFateOperations(ServerContext context, 
String tableName)
 +      throws Exception {
 +    long totalDanglingSeen = 0;
 +    if (tableName == null) {
 +      for (var dataLevel : Ample.DataLevel.values()) {
 +        try (var tablets = 
context.getAmple().readTablets().forLevel(dataLevel).build()) {
 +          totalDanglingSeen += printDanglingFateOperations(context, tablets);
 +        }
 +      }
 +    } else {
 +      var tableId = context.getTableId(tableName);
 +      try (var tablets = 
context.getAmple().readTablets().forTable(tableId).build()) {
 +        totalDanglingSeen += printDanglingFateOperations(context, tablets);
 +      }
 +    }
 +
 +    System.out.printf("\nFound %,d dangling references to fate operations\n", 
totalDanglingSeen);
 +
 +    return totalDanglingSeen;
 +  }
 +
 +  private static long printDanglingFateOperations(ServerContext context,
 +      Iterable<TabletMetadata> tablets) throws Exception {
 +    Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> 
tabletLookup = extents -> {
 +      try (var lookedupTablets =
 +          context.getAmple().readTablets().forTablets(extents, 
Optional.empty()).build()) {
 +        Map<KeyExtent,TabletMetadata> tabletMap = new HashMap<>();
 +        lookedupTablets
 +            .forEach(tabletMetadata -> 
tabletMap.put(tabletMetadata.getExtent(), tabletMetadata));
 +        return tabletMap;
 +      }
 +    };
 +
 +    UserFateStore<?> ufs = new UserFateStore<>(context, createDummyLockID(), 
null);
 +    MetaFateStore<?> mfs = new MetaFateStore<>(context.getZooKeeperRoot() + 
Constants.ZFATE,
 +        context.getZooReaderWriter(), createDummyLockID(), null);
 +    LoadingCache<FateId,ReadOnlyFateStore.TStatus> fateStatusCache = 
Caffeine.newBuilder()
 +        .maximumSize(100_000).expireAfterWrite(10, 
TimeUnit.SECONDS).build(fateId -> {
 +          if (fateId.getType() == FateInstanceType.META) {
 +            return mfs.read(fateId).getStatus();
 +          } else {
 +            return ufs.read(fateId).getStatus();
 +          }
 +        });
 +
 +    Predicate<FateId> activePredicate = fateId -> {
 +      var status = fateStatusCache.get(fateId);
 +      switch (status) {
 +        case NEW:
 +        case IN_PROGRESS:
 +        case SUBMITTED:
 +        case FAILED_IN_PROGRESS:
 +          return true;
 +        case FAILED:
 +        case SUCCESSFUL:
 +        case UNKNOWN:
 +          return false;
 +        default:
 +          throw new IllegalStateException("Unexpected status: " + status);
 +      }
 +    };
 +
 +    AtomicLong danglingSeen = new AtomicLong();
 +    BiConsumer<KeyExtent,Set<FateId>> danglingConsumer = (extent, fateIds) -> 
{
 +      danglingSeen.addAndGet(fateIds.size());
 +      fateIds.forEach(fateId -> System.out.println(fateId + " " + extent));
 +    };
 +
 +    findDanglingFateOperations(tablets, tabletLookup, activePredicate, 
danglingConsumer, 10_000);
 +    return danglingSeen.get();
 +  }
 +
 +  /**
 +   * Finds tablets that point to fate operations that do not exists or are 
complete.
 +   *
 +   * @param tablets the tablets to inspect
 +   * @param tabletLookup a function that can lookup a tablets latest metadata
 +   * @param activePredicate a predicate that can determine if a fate id is 
currently active
 +   * @param danglingConsumer a consumer that tablets with inactive fate ids 
will be sent to
 +   */
 +  static void findDanglingFateOperations(Iterable<TabletMetadata> tablets,
 +      Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> 
tabletLookup,
 +      Predicate<FateId> activePredicate, BiConsumer<KeyExtent,Set<FateId>> 
danglingConsumer,
 +      int bufferSize) {
 +
 +    ArrayList<FateId> fateIds = new ArrayList<>();
 +    Map<KeyExtent,Set<FateId>> candidates = new HashMap<>();
 +    for (TabletMetadata tablet : tablets) {
 +      fateIds.clear();
 +      getAllFateIds(tablet, fateIds::add);
 +      fateIds.removeIf(activePredicate);
 +      if (!fateIds.isEmpty()) {
 +        candidates.put(tablet.getExtent(), new HashSet<>(fateIds));
 +        if (candidates.size() > bufferSize) {
 +          processCandidates(candidates, tabletLookup, danglingConsumer);
 +          candidates.clear();
 +        }
 +      }
 +    }
 +
 +    processCandidates(candidates, tabletLookup, danglingConsumer);
 +  }
 +
 +  private static void processCandidates(Map<KeyExtent,Set<FateId>> candidates,
 +      Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> 
tabletLookup,
 +      BiConsumer<KeyExtent,Set<FateId>> danglingConsumer) {
 +    // Perform a 2nd check of the tablet to avoid race conditions like the 
following.
 +    // 1. THREAD 1 : TabletMetadata is read and points to active fate 
operation
 +    // 2. THREAD 2 : The fate operation is deleted from the tablet
 +    // 3. THREAD 2 : The fate operation completes
 +    // 4. THREAD 1 : Checks if the fate operation read in step 1 is active 
and finds it is not
 +
 +    Map<KeyExtent,TabletMetadata> currentTablets = 
tabletLookup.apply(candidates.keySet());
 +    HashSet<FateId> currentFateIds = new HashSet<>();
 +    candidates.forEach((extent, fateIds) -> {
 +      var currentTablet = currentTablets.get(extent);
 +      if (currentTablet != null) {
 +        currentFateIds.clear();
 +        getAllFateIds(currentTablet, currentFateIds::add);
 +        // Only keep fate ids that are still present in the tablet. Any new 
fate ids in
 +        // currentFateIds that were not seen on the first pass are not 
considered here. To check
 +        // those new ones, the entire two-step process would need to be rerun.
 +        fateIds.retainAll(currentFateIds);
 +
 +        if (!fateIds.isEmpty()) {
 +          // the fateIds in this set were found to be inactive and still 
exist in the tablet
 +          // metadata after being found inactive
 +          danglingConsumer.accept(extent, fateIds);
 +        }
 +      } // else the tablet no longer exist so nothing to report
 +    });
 +  }
 +
 +  /**
 +   * Extracts all fate ids that a tablet points to from any field.
 +   */
 +  private static void getAllFateIds(TabletMetadata tabletMetadata,
 +      Consumer<FateId> fateIdConsumer) {
 +    tabletMetadata.getLoaded().values().forEach(fateIdConsumer);
 +    if (tabletMetadata.getSelectedFiles() != null) {
 +      fateIdConsumer.accept(tabletMetadata.getSelectedFiles().getFateId());
 +    }
 +    if (tabletMetadata.getOperationId() != null) {
 +      fateIdConsumer.accept(tabletMetadata.getOperationId().getFateId());
 +    }
 +  }
 +
    @VisibleForTesting
-   public static void executeCheckCommand(ServerContext context, CheckCommand 
cmd) {
+   public static void executeCheckCommand(ServerContext context, CheckCommand 
cmd,
+       ServerUtilOpts opts) throws Exception {
      validateAndTransformCheckCommand(cmd);
  
      if (cmd.list) {
diff --cc 
server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java
index d09436e5bd,0cfa61bf0d..d17c2cc691
--- 
a/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/util/FindOfflineTablets.java
@@@ -21,9 -21,12 +21,10 @@@ package org.apache.accumulo.server.util
  import java.util.Iterator;
  import java.util.Set;
  import java.util.concurrent.atomic.AtomicBoolean;
+ import java.util.function.Consumer;
  
  import org.apache.accumulo.core.client.TableNotFoundException;
 -import org.apache.accumulo.core.data.Range;
 -import org.apache.accumulo.core.data.TableId;
 -import org.apache.accumulo.core.dataImpl.KeyExtent;
 +import org.apache.accumulo.core.client.admin.TabletAvailability;
  import org.apache.accumulo.core.manager.state.tables.TableState;
  import org.apache.accumulo.core.metadata.AccumuloTable;
  import org.apache.accumulo.core.metadata.TServerInstance;
@@@ -76,13 -83,15 +79,16 @@@ public class FindOfflineTablets 
      tservers.startListeningForTabletServerChanges();
      scanning.set(true);
  
 -    Iterator<TabletLocationState> zooScanner =
 -        TabletStateStore.getStoreForLevel(DataLevel.ROOT, context).iterator();
 -
      int offline = 0;
  
-     try (TabletsMetadata tabletsMetadata =
-         context.getAmple().readTablets().forLevel(DataLevel.ROOT).build()) {
-       System.out.println("Scanning zookeeper");
-       if ((offline = checkTablets(context, tabletsMetadata.iterator(), 
tservers)) > 0) {
-         return offline;
+     if (!skipZkScan) {
 -      printInfoMethod.accept("Scanning zookeeper");
 -      if ((offline = checkTablets(context, zooScanner, tservers, 
printProblemMethod)) > 0) {
 -        return offline;
++      try (TabletsMetadata tabletsMetadata =
++          context.getAmple().readTablets().forLevel(DataLevel.ROOT).build()) {
++        printInfoMethod.accept("Scanning zookeeper");
++        if ((offline =
++            checkTablets(context, tabletsMetadata.iterator(), tservers, 
printProblemMethod)) > 0) {
++          return offline;
++        }
        }
      }
  
@@@ -90,11 -99,12 +96,14 @@@
        return 0;
      }
  
-     System.out.println("Scanning " + AccumuloTable.ROOT.tableName());
-     try (TabletsMetadata tabletsMetadata =
-         
context.getAmple().readTablets().forLevel(DataLevel.METADATA).build()) {
-       if ((offline = checkTablets(context, tabletsMetadata.iterator(), 
tservers)) > 0) {
-         return offline;
+     if (!skipRootScan) {
+       printInfoMethod.accept("Scanning " + AccumuloTable.ROOT.tableName());
 -      Iterator<TabletLocationState> rootScanner = new 
MetaDataTableScanner(context,
 -          TabletsSection.getRange(), AccumuloTable.ROOT.tableName());
 -      if ((offline = checkTablets(context, rootScanner, tservers, 
printProblemMethod)) > 0) {
 -        return offline;
++      try (TabletsMetadata tabletsMetadata =
++          
context.getAmple().readTablets().forLevel(DataLevel.METADATA).build()) {
++        if ((offline =
++            checkTablets(context, tabletsMetadata.iterator(), tservers, 
printProblemMethod)) > 0) {
++          return offline;
++        }
        }
      }
  
@@@ -102,27 -112,32 +111,27 @@@
        return 0;
      }
  
-     System.out.println("Scanning " + AccumuloTable.METADATA.tableName());
+     printInfoMethod.accept("Scanning " + AccumuloTable.METADATA.tableName());
  
 -    Range range = TabletsSection.getRange();
 -    if (tableName != null) {
 -      TableId tableId = context.getTableId(tableName);
 -      range = new KeyExtent(tableId, null, null).toMetaRange();
 -    }
 -
 -    try (MetaDataTableScanner metaScanner =
 -        new MetaDataTableScanner(context, range, 
AccumuloTable.METADATA.tableName())) {
 -      return checkTablets(context, metaScanner, tservers, printProblemMethod);
 +    try (var metaScanner = 
context.getAmple().readTablets().forLevel(DataLevel.USER).build()) {
-       return checkTablets(context, metaScanner.iterator(), tservers);
++      return checkTablets(context, metaScanner.iterator(), tservers, 
printProblemMethod);
      }
    }
  
 -  private static int checkTablets(ServerContext context, 
Iterator<TabletLocationState> scanner,
 +  private static int checkTablets(ServerContext context, 
Iterator<TabletMetadata> scanner,
-       LiveTServerSet tservers) {
+       LiveTServerSet tservers, Consumer<String> printProblemMethod) {
      int offline = 0;
  
      while (scanner.hasNext() && !System.out.checkError()) {
 -      TabletLocationState locationState = scanner.next();
 -      TabletState state = 
locationState.getState(tservers.getCurrentServers());
 +      TabletMetadata tabletMetadata = scanner.next();
 +      Set<TServerInstance> liveTServers = tservers.getCurrentServers();
 +      TabletState state = TabletState.compute(tabletMetadata, liveTServers);
        if (state != null && state != TabletState.HOSTED
 -          && 
context.getTableManager().getTableState(locationState.extent.tableId())
 -              != TableState.OFFLINE) {
 -        printProblemMethod
 -            .accept(locationState + " is " + state + "  #walogs:" + 
locationState.walogs.size());
 +          && tabletMetadata.getTabletAvailability() == 
TabletAvailability.HOSTED
 +          && 
context.getTableManager().getTableState(tabletMetadata.getTableId())
 +              == TableState.ONLINE) {
-         System.out.println(tabletMetadata.getExtent() + " is " + state + "  
#walogs:"
++        printProblemMethod.accept(tabletMetadata.getExtent() + " is " + state 
+ "  #walogs:"
 +            + tabletMetadata.getLogs().size());
          offline++;
        }
      }
diff --cc 
server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/RootMetadataCheckRunner.java
index bd4282246d,3b4cd1112b..99dee426ca
--- 
a/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/RootMetadataCheckRunner.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/RootMetadataCheckRunner.java
@@@ -24,12 -41,96 +41,96 @@@ public class RootMetadataCheckRunner im
    private static final Admin.CheckCommand.Check check = 
Admin.CheckCommand.Check.ROOT_METADATA;
  
    @Override
-   public Admin.CheckCommand.CheckStatus runCheck() {
+   public String tableName() {
+     throw new UnsupportedOperationException();
+   }
+ 
+   @Override
+   public TableId tableId() {
+     throw new UnsupportedOperationException();
+   }
+ 
+   @Override
+   public Set<ColumnFQ> requiredColFQs() {
+     return 
Set.of(MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN,
+         MetadataSchema.TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN,
+         MetadataSchema.TabletsSection.ServerColumnFamily.TIME_COLUMN,
+         MetadataSchema.TabletsSection.ServerColumnFamily.LOCK_COLUMN);
+   }
+ 
+   @Override
+   public Set<Text> requiredColFams() {
+     return 
Set.of(MetadataSchema.TabletsSection.CurrentLocationColumnFamily.NAME);
+   }
+ 
+   @Override
+   public String scanning() {
+     return "root tablet metadata in ZooKeeper";
+   }
+ 
+   @Override
+   public Admin.CheckCommand.CheckStatus runCheck(ServerContext context, 
ServerUtilOpts opts,
+       boolean fixFiles) throws TableNotFoundException, InterruptedException, 
KeeperException {
      Admin.CheckCommand.CheckStatus status = Admin.CheckCommand.CheckStatus.OK;
+     printRunning();
+ 
+     log.trace("********** Looking for offline tablets **********");
+     if (FindOfflineTablets.findOffline(context, 
AccumuloTable.ROOT.tableName(), false, true,
+         log::trace, log::warn) != 0) {
+       status = Admin.CheckCommand.CheckStatus.FAILED;
+     } else {
+       log.trace("All good... No offline tablets found");
+     }
+ 
+     log.trace("********** Looking for missing columns **********");
+     status = checkRequiredColumns(context, status);
+ 
+     log.trace("********** Looking for invalid columns **********");
+     final String path = context.getZooKeeperRoot() + RootTable.ZROOT_TABLET;
+     final String json = new String(context.getZooReader().getData(path), 
UTF_8);
+     final var rtm = new RootTabletMetadata(json);
 -    status = checkColumns(context, rtm.toKeyValues().iterator(), status);
++    status = checkColumns(context, rtm.getKeyValues().iterator(), status);
+ 
+     printCompleted(status);
+     return status;
+   }
+ 
+   @Override
+   public Admin.CheckCommand.CheckStatus checkRequiredColumns(ServerContext 
context,
+       Admin.CheckCommand.CheckStatus status)
+       throws TableNotFoundException, InterruptedException, KeeperException {
+     final String path = context.getZooKeeperRoot() + RootTable.ZROOT_TABLET;
+     final String json = new String(context.getZooReader().getData(path), 
UTF_8);
+     final var rtm = new RootTabletMetadata(json);
+     final Set<Text> rowsSeen = new HashSet<>();
+     final Set<ColumnFQ> requiredColFQs = new HashSet<>(requiredColFQs());
+     final Set<Text> requiredColFams = new HashSet<>(requiredColFams());
+ 
+     log.trace("Scanning the {} for missing required columns...\n", 
scanning());
 -    rtm.toKeyValues().forEach(e -> {
++    rtm.getKeyValues().forEach(e -> {
+       var key = e.getKey();
+       rowsSeen.add(key.getRow());
+       boolean removed =
+           requiredColFQs.remove(new ColumnFQ(key.getColumnFamily(), 
key.getColumnQualifier()));
+       if (!removed) {
+         requiredColFams.remove(key.getColumnFamily());
+       }
+     });
+ 
+     if (rowsSeen.size() != 1) {
+       status = Admin.CheckCommand.CheckStatus.FAILED;
+       log.warn("Did not see one tablet for the root table!");
+     } else {
+       if (!requiredColFQs.isEmpty() || !requiredColFams.isEmpty()) {
+         log.warn("Tablet {} is missing required columns: col FQs: {}, col 
fams: {} in the {}\n",
+             rowsSeen.stream().findFirst().orElseThrow(), requiredColFQs, 
requiredColFams,
+             scanning());
+         status = Admin.CheckCommand.CheckStatus.FAILED;
+       } else {
+         log.trace("...The {} contains all required columns for the root 
tablet\n", scanning());
+       }
+     }
  
-     System.out.println("Running check " + check);
-     // work
-     System.out.println("Check " + check + " completed with status " + status);
      return status;
    }
  
diff --cc 
server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/TableLocksCheckRunner.java
index 0000000000,9161c9867d..c7c04079c5
mode 000000,100644..100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/TableLocksCheckRunner.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/util/checkCommand/TableLocksCheckRunner.java
@@@ -1,0 -1,112 +1,121 @@@
+ /*
+  * Licensed to the Apache Software Foundation (ASF) under one
+  * or more contributor license agreements.  See the NOTICE file
+  * distributed with this work for additional information
+  * regarding copyright ownership.  The ASF licenses this file
+  * to you under the Apache License, Version 2.0 (the
+  * "License"); you may not use this file except in compliance
+  * with the License.  You may obtain a copy of the License at
+  *
+  *   https://www.apache.org/licenses/LICENSE-2.0
+  *
+  * Unless required by applicable law or agreed to in writing,
+  * software distributed under the License is distributed on an
+  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  * KIND, either express or implied.  See the License for the
+  * specific language governing permissions and limitations
+  * under the License.
+  */
+ package org.apache.accumulo.server.util.checkCommand;
+ 
+ import java.util.List;
+ import java.util.Map;
+ 
+ import org.apache.accumulo.core.Constants;
+ import org.apache.accumulo.core.client.AccumuloException;
+ import org.apache.accumulo.core.client.AccumuloSecurityException;
++import org.apache.accumulo.core.fate.AbstractFateStore;
+ import org.apache.accumulo.core.fate.AdminUtil;
 -import org.apache.accumulo.core.fate.ZooStore;
++import org.apache.accumulo.core.fate.FateId;
++import org.apache.accumulo.core.fate.FateInstanceType;
++import org.apache.accumulo.core.fate.user.UserFateStore;
++import org.apache.accumulo.core.fate.zookeeper.MetaFateStore;
+ import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter;
 -import org.apache.accumulo.core.lock.ServiceLock;
++import org.apache.accumulo.core.metadata.AccumuloTable;
+ import org.apache.accumulo.server.ServerContext;
+ import org.apache.accumulo.server.cli.ServerUtilOpts;
+ import org.apache.accumulo.server.util.Admin;
+ import org.apache.zookeeper.KeeperException;
+ 
+ public class TableLocksCheckRunner implements CheckRunner {
+   private static final Admin.CheckCommand.Check check = 
Admin.CheckCommand.Check.TABLE_LOCKS;
+ 
+   @Override
+   public Admin.CheckCommand.CheckStatus runCheck(ServerContext context, 
ServerUtilOpts opts,
+       boolean fixFiles) throws Exception {
+     Admin.CheckCommand.CheckStatus status = Admin.CheckCommand.CheckStatus.OK;
+     printRunning();
+ 
+     log.trace("********** Checking some references **********");
+     status = checkTableLocks(context, status);
+ 
+     printCompleted(status);
+     return status;
+   }
+ 
+   @Override
+   public Admin.CheckCommand.Check getCheck() {
+     return check;
+   }
+ 
+   private static Admin.CheckCommand.CheckStatus checkTableLocks(ServerContext 
context,
+       Admin.CheckCommand.CheckStatus status)
+       throws InterruptedException, KeeperException, AccumuloException, 
AccumuloSecurityException {
+     final AdminUtil<Admin> admin = new AdminUtil<>(true);
+     final String zkRoot = context.getZooKeeperRoot();
 -    final var zTableLocksPath = ServiceLock.path(zkRoot + 
Constants.ZTABLE_LOCKS);
++    final var zTableLocksPath = 
context.getServerPaths().createTableLocksPath();
+     final String fateZkPath = zkRoot + Constants.ZFATE;
+     final ZooReaderWriter zk = context.getZooReaderWriter();
 -    final ZooStore<Admin> zs = new ZooStore<>(fateZkPath, zk);
++    final MetaFateStore<Admin> mfs =
++        new MetaFateStore<>(fateZkPath, zk, 
AbstractFateStore.createDummyLockID(), null);
++    final UserFateStore<Admin> ufs = new UserFateStore<>(context, 
AccumuloTable.FATE.tableName(),
++        AbstractFateStore.createDummyLockID(), null);
+ 
+     log.trace("Ensuring table and namespace locks are valid...");
+ 
+     var tableIds = context.tableOperations().tableIdMap().values();
+     var namespaceIds = 
context.namespaceOperations().namespaceIdMap().values();
+     List<String> lockedIds = zk.getChildren(zTableLocksPath.toString());
+     boolean locksExist = !lockedIds.isEmpty();
+ 
+     if (locksExist) {
+       lockedIds.removeAll(tableIds);
+       lockedIds.removeAll(namespaceIds);
+       if (!lockedIds.isEmpty()) {
+         status = Admin.CheckCommand.CheckStatus.FAILED;
+         log.warn("...Some table and namespace locks are INVALID (the 
table/namespace DNE): "
+             + lockedIds);
+       } else {
+         log.trace("...locks are valid");
+       }
+     } else {
+       log.trace("...no locks present");
+     }
+ 
+     log.trace("Ensuring table and namespace locks are associated with a FATE 
op...");
+ 
+     if (locksExist) {
 -      final var fateStatus = admin.getStatus(zs, zk, zTableLocksPath, null, 
null);
++      final var fateStatus =
++          admin.getStatus(Map.of(FateInstanceType.META, mfs, 
FateInstanceType.USER, ufs), zk,
++              zTableLocksPath, null, null, null);
+       if (!fateStatus.getDanglingHeldLocks().isEmpty()
+           || !fateStatus.getDanglingWaitingLocks().isEmpty()) {
+         status = Admin.CheckCommand.CheckStatus.FAILED;
+         log.warn("The following locks did not have an associated FATE 
operation\n");
 -        for (Map.Entry<String,List<String>> entry : 
fateStatus.getDanglingHeldLocks().entrySet()) {
 -          log.warn("txid: " + entry.getKey() + " locked: " + 
entry.getValue());
++        for (Map.Entry<FateId,List<String>> entry : 
fateStatus.getDanglingHeldLocks().entrySet()) {
++          log.warn("fateId: " + entry.getKey() + " locked: " + 
entry.getValue());
+         }
 -        for (Map.Entry<String,List<String>> entry : 
fateStatus.getDanglingWaitingLocks()
++        for (Map.Entry<FateId,List<String>> entry : 
fateStatus.getDanglingWaitingLocks()
+             .entrySet()) {
 -          log.warn("txid: " + entry.getKey() + " locking: " + 
entry.getValue());
++          log.warn("fateId: " + entry.getKey() + " locking: " + 
entry.getValue());
+         }
+       } else {
+         log.trace("...locks are valid");
+       }
+     } else {
+       log.trace("...no locks present");
+     }
+ 
+     return status;
+   }
+ }
diff --cc test/src/main/resources/log4j2-test.properties
index ac62675d85,fa8c7864ba..e6fde6790a
--- a/test/src/main/resources/log4j2-test.properties
+++ b/test/src/main/resources/log4j2-test.properties
@@@ -142,11 -142,8 +142,14 @@@ logger.38.level = debu
  logger.39.name = org.apache.accumulo.manager.Manager
  logger.39.level = trace
  
 -logger.40.name = org.apache.accumulo.server.util.checkCommand.CheckRunner
 +logger.40.name = org.apache.accumulo.tablet
  logger.40.level = trace
  
 +logger.41.name = org.apache.accumulo.server.metadata
 +logger.41.level = trace
 +
++logger.42.name = org.apache.accumulo.server.util.checkCommand.CheckRunner
++logger.42.level = trace
++
  rootLogger.level = debug
  rootLogger.appenderRef.console.ref = STDOUT

Reply via email to