This is an automated email from the ASF dual-hosted git repository. krathbun pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 00bcad6dd16c5e102072499c72714ee9c111a449 Merge: 26f316d188 9f8c06979f Author: Kevin Rathbun <kevinrr...@gmail.com> AuthorDate: Tue Sep 17 15:18:35 2024 -0400 Merge branch '3.1' .../accumulo/core/conf/AccumuloConfiguration.java | 39 +- .../core/conf/AccumuloConfigurationTest.java | 103 ++++ .../server/constraints/BulkFileColData.java | 77 +++ .../server/constraints/MetadataConstraints.java | 552 ++++++++++++--------- .../server/metadata/RootTabletMutatorImpl.java | 2 +- .../org/apache/accumulo/server/util/Admin.java | 195 ++++++++ .../server/util/checkCommand/CheckRunner.java | 38 ++ .../checkCommand/MetadataTableCheckRunner.java | 40 ++ .../util/checkCommand/RootMetadataCheckRunner.java | 40 ++ .../util/checkCommand/RootTableCheckRunner.java | 40 ++ .../util/checkCommand/SystemConfigCheckRunner.java | 40 ++ .../util/checkCommand/SystemFilesCheckRunner.java | 40 ++ .../util/checkCommand/UserFilesCheckRunner.java | 42 ++ .../constraints/MetadataConstraintsTest.java | 40 +- .../org/apache/accumulo/test/AdminCheckIT.java | 411 +++++++++++++++ .../apache/accumulo/test/functional/BulkNewIT.java | 2 +- 16 files changed, 1435 insertions(+), 266 deletions(-) diff --cc server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java index 44a94ee2b7,c3ca7bdd4a..55dbb276b3 --- a/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java +++ b/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java @@@ -21,12 -21,12 +21,13 @@@ package org.apache.accumulo.server.cons import static java.nio.charset.StandardCharsets.UTF_8; import java.util.ArrayList; + import java.util.Arrays; import java.util.Collection; - import java.util.HashSet; import java.util.List; import java.util.Set; + import java.util.function.Consumer; +import org.apache.accumulo.core.clientImpl.TabletAvailabilityUtil; import org.apache.accumulo.core.data.ColumnUpdate; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.data.Value; @@@ -127,294 -112,119 +129,130 @@@ public class MetadataConstraints implem return true; } - return validColumnQuals.contains(new ColumnFQ(cu)); + return validColumnQuals.contains(new ColumnFQ(family, qualifier)); } - private static ArrayList<Short> addViolation(ArrayList<Short> lst, int violation) { - if (lst == null) { - lst = new ArrayList<>(); - } + private static void addViolation(ArrayList<Short> lst, int violation) { lst.add((short) violation); - return lst; } - private static ArrayList<Short> addIfNotPresent(ArrayList<Short> lst, int intViolation) { - if (lst == null) { - return addViolation(null, intViolation); - } - short violation = (short) intViolation; - if (!lst.contains(violation)) { - return addViolation(lst, intViolation); + private static void addIfNotPresent(ArrayList<Short> lst, int violation) { + if (!lst.contains((short) violation)) { + addViolation(lst, violation); } - return lst; } /* * Validates the data file metadata is valid for a StoredTabletFile. */ - private static ArrayList<Short> validateDataFileMetadata(ArrayList<Short> violations, - String metadata) { + private static void validateDataFileMetadata(ArrayList<Short> violations, String metadata, + Consumer<StoredTabletFile> stfConsumer) { try { - StoredTabletFile.validate(metadata); + stfConsumer.accept(StoredTabletFile.of(metadata)); } catch (RuntimeException e) { - violations = addViolation(violations, 12); - addViolation(violations, 9); ++ addViolation(violations, 12); } - return violations; } + /** + * Same as defined in {@link Constraint#check(Environment, Mutation)}, but returns an empty list + * instead of null if there are no violations + * + * @param env constraint environment + * @param mutation mutation to check + * @return list of violations, or empty list if no violations + */ @Override public List<Short> check(Environment env, Mutation mutation) { final ServerContext context = ((SystemEnvironment) env).getServerContext(); - - ArrayList<Short> violations = null; - - Collection<ColumnUpdate> colUpdates = mutation.getUpdates(); - - // check the row, it should contains at least one ; or end with < - boolean containsSemiC = false; - - byte[] row = mutation.getRow(); - - // always allow rows that fall within reserved areas - if (row.length > 0 && row[0] == '~') { - return null; - } - - for (byte b : row) { - if (b == ';') { - containsSemiC = true; - } - - if (b == ';' || b == '<') { - break; - } - - if (!validTableNameChars[0xff & b]) { - violations = addIfNotPresent(violations, 4); - } - } - - if (containsSemiC) { - if (row.length == 0) { - violations = addIfNotPresent(violations, 4); - } + final ArrayList<Short> violations = new ArrayList<>(); + final Collection<ColumnUpdate> colUpdates = mutation.getUpdates(); + final byte[] row = mutation.getRow(); + final List<ColumnUpdate> bulkFileColUpdates; + final BulkFileColData bfcValidationData; + + // avoids creating unnecessary objects + if (hasBulkCol(colUpdates)) { + bulkFileColUpdates = new ArrayList<>(); + bfcValidationData = new BulkFileColData(); } else { - // see if last row char is < - if (row.length == 0 || row[row.length - 1] != '<') { - violations = addIfNotPresent(violations, 4); - } - } - - if (row.length > 0 && row[0] == '!') { - if (row.length < 3 || row[1] != '0' || (row[2] != '<' && row[2] != ';')) { - violations = addIfNotPresent(violations, 4); - } + bulkFileColUpdates = null; + bfcValidationData = null; } - // ensure row is not less than Constants.METADATA_TABLE_ID - if (new Text(row).compareTo(new Text(AccumuloTable.METADATA.tableId().canonical())) < 0) { - violations = addViolation(violations, 5); + // always allow rows that fall within reserved areas + if (row.length > 0 && row[0] == '~') { + return violations; } - boolean checkedBulk = false; + validateTabletRow(violations, row); for (ColumnUpdate columnUpdate : colUpdates) { - String colFamStr = new String(columnUpdate.getColumnFamily(), UTF_8); + Text columnFamily = new Text(columnUpdate.getColumnFamily()); + Text columnQualifier = new Text(columnUpdate.getColumnQualifier()); if (columnUpdate.isDeleted()) { - if (!isValidColumn(columnUpdate)) { + if (!isValidColumn(columnFamily, columnQualifier)) { - violations = addViolation(violations, 2); + addViolation(violations, 2); } continue; } - if (columnUpdate.getValue().length == 0 && !(columnFamily.equals(ScanFileColumnFamily.NAME) - || columnFamily.equals(LogColumnFamily.NAME) - || TabletColumnFamily.REQUESTED_COLUMN.equals(columnFamily, columnQualifier) - || columnFamily.equals(CompactedColumnFamily.NAME) - || columnFamily.equals(UserCompactionRequestedColumnFamily.NAME))) { - violations = addViolation(violations, 6); - } + validateColValLen(violations, columnUpdate); - if (columnFamily.equals(DataFileColumnFamily.NAME)) { - violations = validateDataFileMetadata(violations, - new String(columnUpdate.getColumnQualifier(), UTF_8)); - - try { - DataFileValue dfv = new DataFileValue(columnUpdate.getValue()); - - if (dfv.getSize() < 0 || dfv.getNumEntries() < 0) { - violations = addViolation(violations, 1); - } - } catch (NumberFormatException | ArrayIndexOutOfBoundsException nfe) { - violations = addViolation(violations, 1); - } - } else if (columnFamily.equals(ScanFileColumnFamily.NAME)) { - violations = validateDataFileMetadata(violations, - new String(columnUpdate.getColumnQualifier(), UTF_8)); - } else if (TabletColumnFamily.AVAILABILITY_COLUMN.equals(columnFamily, columnQualifier)) { - try { - TabletAvailabilityUtil.fromValue(new Value(columnUpdate.getValue())); - } catch (IllegalArgumentException e) { - violations = addViolation(violations, 16); - } - } else if (ServerColumnFamily.OPID_COLUMN.equals(columnFamily, columnQualifier)) { - try { - TabletOperationId.validate(new String(columnUpdate.getValue(), UTF_8)); - } catch (IllegalArgumentException e) { - violations = addViolation(violations, 9); - } - } else if (ServerColumnFamily.SELECTED_COLUMN.equals(columnFamily, columnQualifier)) { - try { - SelectedFiles.from(new String(columnUpdate.getValue(), UTF_8)); - } catch (RuntimeException e) { - violations = addViolation(violations, 11); - } - } else if (CompactedColumnFamily.NAME.equals(columnFamily)) { - if (!FateId.isFateId(columnQualifier.toString())) { - violations = addViolation(violations, 13); - } - } else if (UserCompactionRequestedColumnFamily.NAME.equals(columnFamily)) { - if (!FateId.isFateId(columnQualifier.toString())) { - violations = addViolation(violations, 14); - } - } else if (SplitColumnFamily.UNSPLITTABLE_COLUMN.equals(columnFamily, columnQualifier)) { - try { - UnSplittableMetadata.toUnSplittable(new String(columnUpdate.getValue(), UTF_8)); - } catch (RuntimeException e) { - violations = addViolation(violations, 15); - } - } else if (columnFamily.equals(BulkFileColumnFamily.NAME)) { - if (!columnUpdate.isDeleted() && !checkedBulk) { - /* - * This needs to be re-worked after Issue https://github.com/apache/accumulo/issues/3505 - * is done. - * - * That issue will reorganizes this class and make things more efficient so we are not - * looping over the same mutation more than once like in this case. The below check is - * commented out for now because the violation check is already done when creating - * StoredTabletFiles so it isn't needed here anymore violations = - * validateDataFileMetadata(violations, new String(columnUpdate.getColumnQualifier(), - * UTF_8)); - */ - - // splits, which also write the time reference, are allowed to write this reference even - // when - // the transaction is not running because the other half of the tablet is holding a - // reference - // to the file. - boolean isSplitMutation = false; - switch (colFamStr) { ++ switch (new String(columnFamily.getBytes(), UTF_8)) { + case TabletColumnFamily.STR_NAME: + validateTabletFamily(violations, columnUpdate, mutation); + break; + case ServerColumnFamily.STR_NAME: + validateServerFamily(violations, columnUpdate, context, bfcValidationData); + break; + case CurrentLocationColumnFamily.STR_NAME: // When a tablet is assigned, it re-writes the metadata. It should probably only update - // the location information, - // but it writes everything. We allow it to re-write the bulk information if it is setting - // the location. + // the location information, but it writes everything. We allow it to re-write the bulk + // information if it is setting the location. // See ACCUMULO-1230. - boolean isLocationMutation = false; - - HashSet<StoredTabletFile> dataFiles = new HashSet<>(); - HashSet<StoredTabletFile> loadedFiles = new HashSet<>(); - - String tidString = new String(columnUpdate.getValue(), UTF_8); - int otherTidCount = 0; - - for (ColumnUpdate update : mutation.getUpdates()) { - if (new ColumnFQ(update).equals(ServerColumnFamily.DIRECTORY_COLUMN)) { - isSplitMutation = true; - } else if (new Text(update.getColumnFamily()) - .equals(CurrentLocationColumnFamily.NAME)) { - isLocationMutation = true; - } else if (new Text(update.getColumnFamily()).equals(DataFileColumnFamily.NAME)) { - try { - // This actually validates for a second time as the loop already validates - // if a DataFileColumnFamily, this will likely be fixed as part of - // https://github.com/apache/accumulo/issues/3505 - dataFiles.add(StoredTabletFile.of(new Text(update.getColumnQualifier()))); - } catch (RuntimeException e) { - violations = addViolation(violations, 12); - } - } else if (new Text(update.getColumnFamily()).equals(BulkFileColumnFamily.NAME)) { - try { - loadedFiles.add(StoredTabletFile.of(new Text(update.getColumnQualifier()))); - } catch (RuntimeException e) { - violations = addViolation(violations, 12); - } - - if (!new String(update.getValue(), UTF_8).equals(tidString)) { - otherTidCount++; - } - } + if (bfcValidationData != null) { + bfcValidationData.setIsLocationMutation(true); } - - if (!isSplitMutation && !isLocationMutation) { - try { - // attempt to parse value - BulkFileColumnFamily.getBulkLoadTid(new Value(tidString)); - - if (otherTidCount > 0 || !dataFiles.equals(loadedFiles)) { - violations = addViolation(violations, 8); - } - } catch (Exception ex) { - violations = addViolation(violations, 8); - } + break; + case SuspendLocationColumn.STR_NAME: + validateSuspendLocationFamily(violations, columnUpdate); + break; + case BulkFileColumnFamily.STR_NAME: + // defer validating the bulk file column updates until the end (relies on checks done + // on other column updates) + if (bulkFileColUpdates != null) { + bulkFileColUpdates.add(columnUpdate); } - - checkedBulk = true; - } - } else { - if (!isValidColumn(columnFamily, columnQualifier)) { - violations = addViolation(violations, 2); - } else { - final var column = new ColumnFQ(columnUpdate); - if (column.equals(TabletColumnFamily.PREV_ROW_COLUMN) - && columnUpdate.getValue().length > 0 - && (violations == null || !violations.contains((short) 4))) { - KeyExtent ke = KeyExtent.fromMetaRow(new Text(mutation.getRow())); - - Text per = TabletColumnFamily.decodePrevEndRow(new Value(columnUpdate.getValue())); - - boolean prevEndRowLessThanEndRow = - per == null || ke.endRow() == null || per.compareTo(ke.endRow()) < 0; - - if (!prevEndRowLessThanEndRow) { - violations = addViolation(violations, 3); - } - } else if (column.equals(ServerColumnFamily.LOCK_COLUMN)) { - if (zooCache == null) { - zooCache = new ZooCache(context.getZooReader(), null); - CleanerUtil.zooCacheClearer(this, zooCache); - } - - if (zooRoot == null) { - zooRoot = context.getZooKeeperRoot(); - } - - boolean lockHeld = false; - String lockId = new String(columnUpdate.getValue(), UTF_8); - - try { - lockHeld = ServiceLock.isLockHeld(zooCache, new ZooUtil.LockID(zooRoot, lockId)); - } catch (Exception e) { - log.debug("Failed to verify lock was held {} {}", lockId, e.getMessage()); - } - - if (!lockHeld) { - violations = addViolation(violations, 7); - } - } else if (column.equals(SuspendLocationColumn.SUSPEND_COLUMN)) { - try { - SuspendingTServer.fromValue(new Value(columnUpdate.getValue())); - } catch (IllegalArgumentException e) { - violations = addViolation(violations, 10); - } + break; + case DataFileColumnFamily.STR_NAME: + validateDataFileFamily(violations, columnUpdate, bfcValidationData); + break; + case ScanFileColumnFamily.STR_NAME: + validateScanFileFamily(violations, columnUpdate); + break; ++ case CompactedColumnFamily.STR_NAME: ++ validateCompactedFamily(violations, columnUpdate); ++ break; ++ case UserCompactionRequestedColumnFamily.STR_NAME: ++ validateUserCompactionRequestedFamily(violations, columnUpdate); ++ break; ++ case SplitColumnFamily.STR_NAME: ++ validateSplitFamily(violations, columnUpdate); ++ break; + default: - if (!isValidColumn(columnUpdate)) { ++ if (!isValidColumn(columnFamily, columnQualifier)) { + addViolation(violations, 2); } - } } } - if (violations != null) { + validateBulkFileFamily(violations, bulkFileColUpdates, bfcValidationData); + + if (!violations.isEmpty()) { - log.debug("violating metadata mutation : {}", new String(mutation.getRow(), UTF_8)); + log.debug("violating metadata mutation : {} {}", new String(mutation.getRow(), UTF_8), + violations); for (ColumnUpdate update : mutation.getUpdates()) { log.debug(" update: {}:{} value {}", new String(update.getColumnFamily(), UTF_8), new String(update.getColumnQualifier(), UTF_8), @@@ -465,4 -263,174 +303,234 @@@ return null; } + private void validateColValLen(ArrayList<Short> violations, ColumnUpdate columnUpdate) { + Text columnFamily = new Text(columnUpdate.getColumnFamily()); ++ Text columnQualifier = new Text(columnUpdate.getColumnQualifier()); + if (columnUpdate.getValue().length == 0 && !(columnFamily.equals(ScanFileColumnFamily.NAME) - || columnFamily.equals(LogColumnFamily.NAME))) { ++ || columnFamily.equals(LogColumnFamily.NAME) ++ || TabletColumnFamily.REQUESTED_COLUMN.equals(columnFamily, columnQualifier) ++ || columnFamily.equals(CompactedColumnFamily.NAME) ++ || columnFamily.equals(UserCompactionRequestedColumnFamily.NAME))) { + addViolation(violations, 6); + } + } + + private void validateTabletRow(ArrayList<Short> violations, byte[] row) { + // check the row, it should contain at least one ";" or end with "<". Row should also + // not be less than AccumuloTable.METADATA.tableId(). + boolean containsSemiC = false; + + for (byte b : row) { + if (b == ';') { + containsSemiC = true; + } + + if (b == ';' || b == '<') { + break; + } + + if (!validTableNameChars[0xff & b]) { + addIfNotPresent(violations, 4); + } + } + + if (!containsSemiC) { + // see if last row char is < + if (row.length == 0 || row[row.length - 1] != '<') { + addIfNotPresent(violations, 4); + } + } + + if (row.length > 0 && row[0] == '!') { + if (row.length < 3 || row[1] != '0' || (row[2] != '<' && row[2] != ';')) { + addIfNotPresent(violations, 4); + } + } + + // ensure row is not less than AccumuloTable.METADATA.tableId() + if (Arrays.compare(row, AccumuloTable.METADATA.tableId().canonical().getBytes(UTF_8)) < 0) { + addViolation(violations, 5); + } + } + + private void validateTabletFamily(ArrayList<Short> violations, ColumnUpdate columnUpdate, + Mutation mutation) { + String qualStr = new String(columnUpdate.getColumnQualifier(), UTF_8); + - if (qualStr.equals(TabletColumnFamily.PREV_ROW_QUAL)) { - if (columnUpdate.getValue().length > 0 && !violations.contains((short) 4)) { - KeyExtent ke = KeyExtent.fromMetaRow(new Text(mutation.getRow())); - Text per = TabletColumnFamily.decodePrevEndRow(new Value(columnUpdate.getValue())); - boolean prevEndRowLessThanEndRow = - per == null || ke.endRow() == null || per.compareTo(ke.endRow()) < 0; - - if (!prevEndRowLessThanEndRow) { - addViolation(violations, 3); ++ switch (qualStr) { ++ case (TabletColumnFamily.PREV_ROW_QUAL): ++ if (columnUpdate.getValue().length > 0 && !violations.contains((short) 4)) { ++ KeyExtent ke = KeyExtent.fromMetaRow(new Text(mutation.getRow())); ++ Text per = TabletColumnFamily.decodePrevEndRow(new Value(columnUpdate.getValue())); ++ boolean prevEndRowLessThanEndRow = ++ per == null || ke.endRow() == null || per.compareTo(ke.endRow()) < 0; ++ ++ if (!prevEndRowLessThanEndRow) { ++ addViolation(violations, 3); ++ } + } - } ++ break; ++ case (TabletColumnFamily.AVAILABILITY_QUAL): ++ try { ++ TabletAvailabilityUtil.fromValue(new Value(columnUpdate.getValue())); ++ } catch (IllegalArgumentException e) { ++ addViolation(violations, 16); ++ } ++ break; + } + } + + private void validateServerFamily(ArrayList<Short> violations, ColumnUpdate columnUpdate, + ServerContext context, BulkFileColData bfcValidationData) { + String qualStr = new String(columnUpdate.getColumnQualifier(), UTF_8); + + switch (qualStr) { + case ServerColumnFamily.LOCK_QUAL: + if (zooCache == null) { + zooCache = new ZooCache(context.getZooReader(), null); + CleanerUtil.zooCacheClearer(this, zooCache); + } + + if (zooRoot == null) { + zooRoot = context.getZooKeeperRoot(); + } + + boolean lockHeld = false; + String lockId = new String(columnUpdate.getValue(), UTF_8); + + try { + lockHeld = ServiceLock.isLockHeld(zooCache, new ZooUtil.LockID(zooRoot, lockId)); + } catch (Exception e) { + log.debug("Failed to verify lock was held {} {}", lockId, e.getMessage()); + } + + if (!lockHeld) { + addViolation(violations, 7); + } + break; + case ServerColumnFamily.DIRECTORY_QUAL: + // splits, which also write the time reference, are allowed to write this reference + // even when the transaction is not running because the other half of the tablet is + // holding a reference to the file. + if (bfcValidationData != null) { + bfcValidationData.setIsSplitMutation(true); + } + break; ++ case ServerColumnFamily.OPID_QUAL: ++ try { ++ TabletOperationId.validate(new String(columnUpdate.getValue(), UTF_8)); ++ } catch (IllegalArgumentException e) { ++ addViolation(violations, 9); ++ } ++ break; ++ case ServerColumnFamily.SELECTED_QUAL: ++ try { ++ SelectedFiles.from(new String(columnUpdate.getValue(), UTF_8)); ++ } catch (RuntimeException e) { ++ addViolation(violations, 11); ++ } ++ break; + } + } + + private void validateSuspendLocationFamily(ArrayList<Short> violations, + ColumnUpdate columnUpdate) { + String qualStr = new String(columnUpdate.getColumnQualifier(), UTF_8); + String suspendColQualStr = + new String(SuspendLocationColumn.SUSPEND_COLUMN.getColumnQualifier().getBytes(), UTF_8); + + if (qualStr.equals(suspendColQualStr)) { + try { + SuspendingTServer.fromValue(new Value(columnUpdate.getValue())); + } catch (IllegalArgumentException e) { + addViolation(violations, 10); + } + } + } + + private void validateDataFileFamily(ArrayList<Short> violations, ColumnUpdate columnUpdate, + BulkFileColData bfcValidationData) { + Consumer<StoredTabletFile> stfConsumer = + bfcValidationData == null ? stf -> {} : bfcValidationData::addDataFile; + validateDataFileMetadata(violations, new String(columnUpdate.getColumnQualifier(), UTF_8), + stfConsumer); + + try { + DataFileValue dfv = new DataFileValue(columnUpdate.getValue()); + + if (dfv.getSize() < 0 || dfv.getNumEntries() < 0) { + addViolation(violations, 1); + } + } catch (NumberFormatException | ArrayIndexOutOfBoundsException nfe) { + addViolation(violations, 1); + } + } + + private void validateScanFileFamily(ArrayList<Short> violations, ColumnUpdate columnUpdate) { + validateDataFileMetadata(violations, new String(columnUpdate.getColumnQualifier(), UTF_8), + stf -> {}); + } + + private void validateBulkFileFamily(ArrayList<Short> violations, + Collection<ColumnUpdate> bulkFileColUpdates, BulkFileColData bfcValidationData) { + if (bulkFileColUpdates != null && !bulkFileColUpdates.isEmpty()) { + for (ColumnUpdate bulkFileColUpdate : bulkFileColUpdates) { + validateDataFileMetadata(violations, + new String(bulkFileColUpdate.getColumnQualifier(), UTF_8), + bfcValidationData::addLoadedFile); + + bfcValidationData.addTidSeen(new String(bulkFileColUpdate.getValue(), UTF_8)); + } + + if (!bfcValidationData.getIsSplitMutation() && !bfcValidationData.getIsLocationMutation()) { ++ for (String tidString : bfcValidationData.getTidsSeen()) { ++ try { ++ // attempt to parse value ++ BulkFileColumnFamily.getBulkLoadTid(new Value(tidString)); ++ } catch (Exception e) { ++ addViolation(violations, 8); ++ } ++ } + if (bfcValidationData.getTidsSeen().size() > 1 + || !bfcValidationData.dataFilesEqualsLoadedFiles()) { + addViolation(violations, 8); + } + } + } + } + ++ private void validateCompactedFamily(ArrayList<Short> violations, ColumnUpdate columnUpdate) { ++ if (!FateId.isFateId(new String(columnUpdate.getColumnQualifier(), UTF_8))) { ++ addViolation(violations, 13); ++ } ++ } ++ ++ private void validateUserCompactionRequestedFamily(ArrayList<Short> violations, ++ ColumnUpdate columnUpdate) { ++ if (!FateId.isFateId(new String(columnUpdate.getColumnQualifier(), UTF_8))) { ++ addViolation(violations, 14); ++ } ++ } ++ ++ private void validateSplitFamily(ArrayList<Short> violations, ColumnUpdate columnUpdate) { ++ String qualStr = new String(columnUpdate.getColumnQualifier(), UTF_8); ++ ++ if (qualStr.equals(SplitColumnFamily.UNSPLITTABLE_QUAL)) { ++ try { ++ UnSplittableMetadata.toUnSplittable(new String(columnUpdate.getValue(), UTF_8)); ++ } catch (RuntimeException e) { ++ addViolation(violations, 15); ++ } ++ } ++ } ++ + private boolean hasBulkCol(Collection<ColumnUpdate> colUpdates) { + for (ColumnUpdate colUpdate : colUpdates) { + if (Arrays.equals(colUpdate.getColumnFamily(), BULK_COL_BYTES)) { + return true; + } + } + return false; + } + } diff --cc server/base/src/main/java/org/apache/accumulo/server/util/Admin.java index ad8f30718b,781037f66e..8cef44c745 --- a/server/base/src/main/java/org/apache/accumulo/server/util/Admin.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/Admin.java @@@ -36,18 -35,14 +36,21 @@@ import java.util.HashSet import java.util.List; import java.util.Map; import java.util.Map.Entry; + import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; +import java.util.function.Consumer; +import java.util.function.Function; +import java.util.function.Predicate; + import java.util.function.Supplier; + import java.util.regex.Pattern; import java.util.stream.Collectors; import org.apache.accumulo.core.Constants; @@@ -102,9 -96,9 +112,11 @@@ import org.slf4j.LoggerFactory import com.beust.jcommander.JCommander; import com.beust.jcommander.Parameter; import com.beust.jcommander.Parameters; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.LoadingCache; import com.google.auto.service.AutoService; + import com.google.common.annotations.VisibleForTesting; + import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSortedMap; import com.google.common.collect.Lists; import com.google.common.net.HostAndPort; @@@ -133,8 -127,94 +145,95 @@@ public class Admin implements KeywordEx List<String> args = new ArrayList<>(); } + @Parameters(commandNames = "check", + commandDescription = "Performs checks for problems in Accumulo.") + public static class CheckCommand { + @Parameter(names = "list", + description = "Lists the different checks that can be run, the description of each check, and the other check(s) each check depends on.") + boolean list; + + @Parameter(names = "run", + description = "Runs the provided check(s) (explicit list or regex pattern specified following '-p'), beginning with their dependencies, or all checks if none are provided.") + boolean run; + + @Parameter(names = {"--name_pattern", "-p"}, + description = "Runs all checks that match the provided regex pattern.") + String pattern; + + @Parameter(description = "[<Check>...]") + List<String> checks; + + /** + * This should be used to get the check runner instead of {@link Check#getCheckRunner()}. This + * exists so that its functionality can be changed for testing. + * + * @return the interface for running a check + */ + public CheckRunner getCheckRunner(Check check) { + return check.getCheckRunner(); + } + + public enum Check { + // Caution should be taken when changing or adding any new checks: order is important + SYSTEM_CONFIG(SystemConfigCheckRunner::new, "Validate the system config stored in ZooKeeper", + Collections.emptyList()), + ROOT_METADATA(RootMetadataCheckRunner::new, + "Checks integrity of the root tablet metadata stored in ZooKeeper", + Collections.singletonList(SYSTEM_CONFIG)), + ROOT_TABLE(RootTableCheckRunner::new, + "Scans all the tablet metadata stored in the root table and checks integrity", + Collections.singletonList(ROOT_METADATA)), + METADATA_TABLE(MetadataTableCheckRunner::new, + "Scans all the tablet metadata stored in the metadata table and checks integrity", + Collections.singletonList(ROOT_TABLE)), + SYSTEM_FILES(SystemFilesCheckRunner::new, + "Checks that files in system tablet metadata exist in DFS", + Collections.singletonList(ROOT_TABLE)), + USER_FILES(UserFilesCheckRunner::new, + "Checks that files in user tablet metadata exist in DFS", + Collections.singletonList(METADATA_TABLE)); + + private final Supplier<CheckRunner> checkRunner; + private final String description; + private final List<Check> dependencies; + + Check(Supplier<CheckRunner> checkRunner, String description, List<Check> dependencies) { + this.checkRunner = Objects.requireNonNull(checkRunner); + this.description = Objects.requireNonNull(description); + this.dependencies = Objects.requireNonNull(dependencies); + } + + /** + * This should not be called directly; use {@link CheckCommand#getCheckRunner(Check)} instead + * + * @return the interface for running a check + */ + public CheckRunner getCheckRunner() { + return checkRunner.get(); + } + + /** + * @return the description of the check + */ + public String getDescription() { + return description; + } + + /** + * @return the list of other checks the check depends on + */ + public List<Check> getDependencies() { + return dependencies; + } + } + + public enum CheckStatus { + OK, FAILED, SKIPPED_DEPENDENCY_FAILED, FILTERED_OUT; + } + } + - @Parameters(commandDescription = "print tablets that are offline in online tables") + @Parameters(commandDescription = "Looks for tablets that are unexpectedly offline, tablets that " + + "reference missing files, or tablets that reference absent fate operations.") static class CheckTabletsCommand { @Parameter(names = "--fixFiles", description = "Remove dangling file pointers") boolean fixFiles = false; @@@ -953,166 -1013,94 +1057,257 @@@ return statusFilter; } + /** + * If provided on the command line, get the FateInstanceType values provided. + * + * @return a set of fate instance types filters, or null if none provided + */ + private EnumSet<FateInstanceType> getCmdLineInstanceTypeFilters(List<String> instanceTypes) { + EnumSet<FateInstanceType> typesFilter = null; + if (!instanceTypes.isEmpty()) { + typesFilter = EnumSet.noneOf(FateInstanceType.class); + for (String instanceType : instanceTypes) { + typesFilter.add(FateInstanceType.valueOf(instanceType)); + } + } + return typesFilter; + } + + private static long printDanglingFateOperations(ServerContext context, String tableName) + throws Exception { + long totalDanglingSeen = 0; + if (tableName == null) { + for (var dataLevel : Ample.DataLevel.values()) { + try (var tablets = context.getAmple().readTablets().forLevel(dataLevel).build()) { + totalDanglingSeen += printDanglingFateOperations(context, tablets); + } + } + } else { + var tableId = context.getTableId(tableName); + try (var tablets = context.getAmple().readTablets().forTable(tableId).build()) { + totalDanglingSeen += printDanglingFateOperations(context, tablets); + } + } + + System.out.printf("\nFound %,d dangling references to fate operations\n", totalDanglingSeen); + + return totalDanglingSeen; + } + + private static long printDanglingFateOperations(ServerContext context, + Iterable<TabletMetadata> tablets) throws Exception { + Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> tabletLookup = extents -> { + try (var lookedupTablets = + context.getAmple().readTablets().forTablets(extents, Optional.empty()).build()) { + Map<KeyExtent,TabletMetadata> tabletMap = new HashMap<>(); + lookedupTablets + .forEach(tabletMetadata -> tabletMap.put(tabletMetadata.getExtent(), tabletMetadata)); + return tabletMap; + } + }; + + UserFateStore<?> ufs = new UserFateStore<>(context); + MetaFateStore<?> mfs = new MetaFateStore<>(context.getZooKeeperRoot() + Constants.ZFATE, + context.getZooReaderWriter()); + LoadingCache<FateId,ReadOnlyFateStore.TStatus> fateStatusCache = Caffeine.newBuilder() + .maximumSize(100_000).expireAfterWrite(10, TimeUnit.SECONDS).build(fateId -> { + if (fateId.getType() == FateInstanceType.META) { + return mfs.read(fateId).getStatus(); + } else { + return ufs.read(fateId).getStatus(); + } + }); + + Predicate<FateId> activePredicate = fateId -> { + var status = fateStatusCache.get(fateId); + switch (status) { + case NEW: + case IN_PROGRESS: + case SUBMITTED: + case FAILED_IN_PROGRESS: + return true; + case FAILED: + case SUCCESSFUL: + case UNKNOWN: + return false; + default: + throw new IllegalStateException("Unexpected status: " + status); + } + }; + + AtomicLong danglingSeen = new AtomicLong(); + BiConsumer<KeyExtent,Set<FateId>> danglingConsumer = (extent, fateIds) -> { + danglingSeen.addAndGet(fateIds.size()); + fateIds.forEach(fateId -> System.out.println(fateId + " " + extent)); + }; + + findDanglingFateOperations(tablets, tabletLookup, activePredicate, danglingConsumer, 10_000); + return danglingSeen.get(); + } + + /** + * Finds tablets that point to fate operations that do not exists or are complete. + * + * @param tablets the tablets to inspect + * @param tabletLookup a function that can lookup a tablets latest metadata + * @param activePredicate a predicate that can determine if a fate id is currently active + * @param danglingConsumer a consumer that tablets with inactive fate ids will be sent to + */ + static void findDanglingFateOperations(Iterable<TabletMetadata> tablets, + Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> tabletLookup, + Predicate<FateId> activePredicate, BiConsumer<KeyExtent,Set<FateId>> danglingConsumer, + int bufferSize) { + + ArrayList<FateId> fateIds = new ArrayList<>(); + Map<KeyExtent,Set<FateId>> candidates = new HashMap<>(); + for (TabletMetadata tablet : tablets) { + fateIds.clear(); + getAllFateIds(tablet, fateIds::add); + fateIds.removeIf(activePredicate); + if (!fateIds.isEmpty()) { + candidates.put(tablet.getExtent(), new HashSet<>(fateIds)); + if (candidates.size() > bufferSize) { + processCandidates(candidates, tabletLookup, danglingConsumer); + candidates.clear(); + } + } + } + + processCandidates(candidates, tabletLookup, danglingConsumer); + } + + private static void processCandidates(Map<KeyExtent,Set<FateId>> candidates, + Function<Collection<KeyExtent>,Map<KeyExtent,TabletMetadata>> tabletLookup, + BiConsumer<KeyExtent,Set<FateId>> danglingConsumer) { + // Perform a 2nd check of the tablet to avoid race conditions like the following. + // 1. THREAD 1 : TabletMetadata is read and points to active fate operation + // 2. THREAD 2 : The fate operation is deleted from the tablet + // 3. THREAD 2 : The fate operation completes + // 4. THREAD 1 : Checks if the fate operation read in step 1 is active and finds it is not + + Map<KeyExtent,TabletMetadata> currentTablets = tabletLookup.apply(candidates.keySet()); + HashSet<FateId> currentFateIds = new HashSet<>(); + candidates.forEach((extent, fateIds) -> { + var currentTablet = currentTablets.get(extent); + if (currentTablet != null) { + currentFateIds.clear(); + getAllFateIds(currentTablet, currentFateIds::add); + // Only keep fate ids that are still present in the tablet. Any new fate ids in + // currentFateIds that were not seen on the first pass are not considered here. To check + // those new ones, the entire two-step process would need to be rerun. + fateIds.retainAll(currentFateIds); + + if (!fateIds.isEmpty()) { + // the fateIds in this set were found to be inactive and still exist in the tablet + // metadata after being found inactive + danglingConsumer.accept(extent, fateIds); + } + } // else the tablet no longer exist so nothing to report + }); + } + + /** + * Extracts all fate ids that a tablet points to from any field. + */ + private static void getAllFateIds(TabletMetadata tabletMetadata, + Consumer<FateId> fateIdConsumer) { + tabletMetadata.getLoaded().values().forEach(fateIdConsumer); + if (tabletMetadata.getSelectedFiles() != null) { + fateIdConsumer.accept(tabletMetadata.getSelectedFiles().getFateId()); + } + if (tabletMetadata.getOperationId() != null) { + fateIdConsumer.accept(tabletMetadata.getOperationId().getFateId()); + } + } ++ + @VisibleForTesting + public static void executeCheckCommand(ServerContext context, CheckCommand cmd) { + validateAndTransformCheckCommand(cmd); + + if (cmd.list) { + listChecks(); + } else if (cmd.run) { + var givenChecks = + cmd.checks.stream().map(CheckCommand.Check::valueOf).collect(Collectors.toList()); + executeRunCheckCommand(cmd, givenChecks); + } + } + + private static void validateAndTransformCheckCommand(CheckCommand cmd) { + Preconditions.checkArgument(cmd.list != cmd.run, "Must use either 'list' or 'run'"); + if (cmd.list) { + Preconditions.checkArgument(cmd.checks == null && cmd.pattern == null, + "'list' does not expect any further arguments"); + } else if (cmd.pattern != null) { + Preconditions.checkArgument(cmd.checks == null, "Expected one argument (the regex pattern)"); + List<String> matchingChecks = new ArrayList<>(); + var pattern = Pattern.compile(cmd.pattern.toUpperCase()); + for (CheckCommand.Check check : CheckCommand.Check.values()) { + if (pattern.matcher(check.name()).matches()) { + matchingChecks.add(check.name()); + } + } + Preconditions.checkArgument(!matchingChecks.isEmpty(), + "No checks matched the given pattern: " + pattern.pattern()); + cmd.checks = matchingChecks; + } else { + if (cmd.checks == null) { + cmd.checks = EnumSet.allOf(CheckCommand.Check.class).stream().map(Enum::name) + .collect(Collectors.toList()); + } + } + } + + private static void listChecks() { + System.out.println(); + System.out.printf("%-20s | %-80s | %-20s%n", "Check Name", "Description", "Depends on"); + System.out.println("-".repeat(120)); + for (CheckCommand.Check check : CheckCommand.Check.values()) { + System.out.printf("%-20s | %-80s | %-20s%n", check.name(), check.getDescription(), + check.getDependencies().stream().map(CheckCommand.Check::name) + .collect(Collectors.joining(", "))); + } + System.out.println("-".repeat(120)); + System.out.println(); + } + + private static void executeRunCheckCommand(CheckCommand cmd, + List<CheckCommand.Check> givenChecks) { + // Get all the checks in the order they are declared in the enum + final var allChecks = CheckCommand.Check.values(); + final TreeMap<CheckCommand.Check,CheckCommand.CheckStatus> checkStatus = new TreeMap<>(); + + for (CheckCommand.Check check : allChecks) { + if (depsFailed(check, checkStatus)) { + checkStatus.put(check, CheckCommand.CheckStatus.SKIPPED_DEPENDENCY_FAILED); + } else { + if (givenChecks.contains(check)) { + checkStatus.put(check, cmd.getCheckRunner(check).runCheck()); + } else { + checkStatus.put(check, CheckCommand.CheckStatus.FILTERED_OUT); + } + } + } + + printChecksResults(checkStatus); + } + + private static boolean depsFailed(CheckCommand.Check check, + TreeMap<CheckCommand.Check,CheckCommand.CheckStatus> checkStatus) { + return check.getDependencies().stream() + .anyMatch(dep -> checkStatus.get(dep) == CheckCommand.CheckStatus.FAILED + || checkStatus.get(dep) == CheckCommand.CheckStatus.SKIPPED_DEPENDENCY_FAILED); + } + + private static void + printChecksResults(TreeMap<CheckCommand.Check,CheckCommand.CheckStatus> checkStatus) { + System.out.println(); + System.out.printf("%-20s | %-20s%n", "Check Name", "Status"); + System.out.println("-".repeat(50)); + for (Map.Entry<CheckCommand.Check,CheckCommand.CheckStatus> entry : checkStatus.entrySet()) { + System.out.printf("%-20s | %-20s%n", entry.getKey().name(), entry.getValue().name()); + } + System.out.println("-".repeat(50)); + System.out.println(); + } } diff --cc server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java index 1424673095,4d69ebd8a6..043fa60376 --- a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java @@@ -19,10 -19,9 +19,12 @@@ package org.apache.accumulo.server.constraints; import static org.junit.jupiter.api.Assertions.assertEquals; + import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertIterableEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; + import static org.junit.jupiter.api.Assertions.assertTrue; import java.lang.reflect.Method; import java.util.Base64; @@@ -267,20 -253,20 +269,20 @@@ public class MetadataConstraintsTest m.put( BulkFileColumnFamily.NAME, StoredTabletFile .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new Value("5")); + new Value(fateId1.canonical())); ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1")); violations = mc.check(createEnv(), m); - assertNull(violations); + assertTrue(violations.isEmpty()); // mutation that looks like a load m = new Mutation(new Text("0;foo")); m.put( BulkFileColumnFamily.NAME, StoredTabletFile .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new Value("5")); + new Value(fateId1.canonical())); m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new Value("127.0.0.1:9997")); violations = mc.check(createEnv(), m); - assertNull(violations); + assertTrue(violations.isEmpty()); // deleting a load flag m = new Mutation(new Text("0;foo")); @@@ -483,128 -470,9 +485,128 @@@ new Range("a", false, "b", true)).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); violations = mc.check(createEnv(), m); - assertNull(violations); + assertTrue(violations.isEmpty()); - assertNotNull(mc.getViolationDescription((short) 9)); + assertNotNull(mc.getViolationDescription((short) 12)); + } + + @Test + public void testOperationId() { + MetadataConstraints mc = new MetadataConstraints(); + Mutation m; + List<Short> violations; + + m = new Mutation(new Text("0;foo")); + ServerColumnFamily.OPID_COLUMN.put(m, new Value("bad id")); + assertViolation(mc, m, (short) 9); + + m = new Mutation(new Text("0;foo")); + ServerColumnFamily.OPID_COLUMN.put(m, + new Value("MERGING:FATE:META:12345678-9abc-def1-2345-6789abcdef12")); + violations = mc.check(createEnv(), m); + assertNull(violations); + } + + @Test + public void testSelectedFiles() { + MetadataConstraints mc = new MetadataConstraints(); + Mutation m; + List<Short> violations; + FateId fateId = FateId.from(FateInstanceType.META, UUID.randomUUID()); + + m = new Mutation(new Text("0;foo")); + ServerColumnFamily.SELECTED_COLUMN.put(m, new Value("bad id")); + violations = mc.check(createEnv(), m); + assertNotNull(violations); + assertEquals(1, violations.size()); + assertEquals(Short.valueOf((short) 11), violations.get(0)); + + m = new Mutation(new Text("0;foo")); + ServerColumnFamily.SELECTED_COLUMN.put(m, + new Value(new SelectedFiles(Set.of(new ReferencedTabletFile( + new Path("hdfs://nn.somewhere.com:86753/accumulo/tables/42/t-0000/F00001.rf")) + .insert()), true, fateId, SteadyTime.from(100, TimeUnit.NANOSECONDS)) + .getMetadataValue())); + violations = mc.check(createEnv(), m); + assertNull(violations); + } + + @Test + public void testCompacted() { + testFateCqValidation(CompactedColumnFamily.STR_NAME, (short) 13); + } + + @Test + public void testUserCompactionRequested() { + testFateCqValidation(UserCompactionRequestedColumnFamily.STR_NAME, (short) 14); + } + + // Verify that columns that store a FateId in their CQ + // validate and only allow a correctly formatted FateId + private void testFateCqValidation(String column, short violation) { + MetadataConstraints mc = new MetadataConstraints(); + Mutation m; + List<Short> violations; + FateId fateId = FateId.from(FateInstanceType.META, UUID.randomUUID()); + + m = new Mutation(new Text("0;foo")); + m.put(column, fateId.canonical(), ""); + violations = mc.check(createEnv(), m); + assertNull(violations); + + m = new Mutation(new Text("0;foo")); + m.put(column, "incorrect data", ""); + violations = mc.check(createEnv(), m); + assertNotNull(violations); + assertEquals(1, violations.size()); + assertEquals(violation, violations.get(0)); + } + + @Test + public void testUnsplittableColumn() { + MetadataConstraints mc = new MetadataConstraints(); + Mutation m; + List<Short> violations; + + StoredTabletFile sf1 = StoredTabletFile.of(new Path("hdfs://nn1/acc/tables/1/t-0001/sf1.rf")); + var unsplittableMeta = UnSplittableMetadata + .toUnSplittable(KeyExtent.fromMetaRow(new Text("0;foo")), 100, 110, 120, Set.of(sf1)); + + m = new Mutation(new Text("0;foo")); + SplitColumnFamily.UNSPLITTABLE_COLUMN.put(m, new Value(unsplittableMeta.toBase64())); + violations = mc.check(createEnv(), m); + assertNull(violations); + + // Verify empty value not allowed + m = new Mutation(new Text("0;foo")); + SplitColumnFamily.UNSPLITTABLE_COLUMN.put(m, new Value()); + violations = mc.check(createEnv(), m); + assertNotNull(violations); + assertEquals(2, violations.size()); + assertIterableEquals(List.of((short) 6, (short) 15), violations); + + // test invalid args + KeyExtent extent = KeyExtent.fromMetaRow(new Text("0;foo")); + assertThrows(IllegalArgumentException.class, + () -> UnSplittableMetadata.toUnSplittable(extent, -100, 110, 120, Set.of(sf1))); + assertThrows(IllegalArgumentException.class, + () -> UnSplittableMetadata.toUnSplittable(extent, 100, -110, 120, Set.of(sf1))); + assertThrows(IllegalArgumentException.class, + () -> UnSplittableMetadata.toUnSplittable(extent, 100, 110, -120, Set.of(sf1))); + assertThrows(NullPointerException.class, + () -> UnSplittableMetadata.toUnSplittable(extent, 100, 110, 120, null)); + + // Test metadata constraints validate invalid hashcode + m = new Mutation(new Text("0;foo")); + unsplittableMeta = UnSplittableMetadata.toUnSplittable(extent, 100, 110, 120, Set.of(sf1)); + // partial hashcode is invalid + var invalidHashCode = + unsplittableMeta.toBase64().substring(0, unsplittableMeta.toBase64().length() - 1); + SplitColumnFamily.UNSPLITTABLE_COLUMN.put(m, new Value(invalidHashCode)); + violations = mc.check(createEnv(), m); + assertNotNull(violations); + assertEquals(1, violations.size()); + assertEquals(Short.valueOf((short) 15), violations.get(0)); } // Encode a row how it would appear in Json