This is an automated email from the ASF dual-hosted git repository. cshannon pushed a commit to branch no-chop-merge in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/no-chop-merge by this push: new 59f082642f No chop and no split deletions (#3728) 59f082642f is described below commit 59f082642f0679409de444d1b8c9bf363c42e41a Author: Christopher L. Shannon <christopher.l.shan...@gmail.com> AuthorDate: Fri Sep 15 09:33:53 2023 -0400 No chop and no split deletions (#3728) This commit changes deletions to no longer require splitting a tablet or chop compactions. Instead, tablets that overlap the start/end of a deletion range will be have its files fenced to exclude the rows that are part of the deletion. Co-authored-by: Christopher Tubbs <ctubb...@apache.org> Co-authored-by: Keith Turner <ktur...@apache.org> --- .../accumulo/server/manager/state/MergeInfo.java | 3 - .../java/org/apache/accumulo/manager/Manager.java | 11 - .../accumulo/manager/TabletGroupWatcher.java | 388 ++++++++++++++++----- .../apache/accumulo/manager/state/MergeStats.java | 28 +- .../accumulo/test/functional/DeleteRowsIT.java | 102 +++++- 5 files changed, 407 insertions(+), 125 deletions(-) diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/MergeInfo.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/MergeInfo.java index 24ad4fd90a..510dc5fbff 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/MergeInfo.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/MergeInfo.java @@ -82,9 +82,6 @@ public class MergeInfo implements Writable { } public boolean needsToBeChopped(KeyExtent otherExtent) { - // TODO: For now only Deletes still need chops - // During a delete, the block after the merge will be stretched to cover the deleted area. - // Therefore, it needs to be chopped if (isDelete() && otherExtent.tableId().equals(extent.tableId())) { return otherExtent.prevEndRow() != null && otherExtent.prevEndRow().equals(extent.endRow()); } else { diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java index 7e8efbf963..d214a06c91 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/Manager.java @@ -95,7 +95,6 @@ import org.apache.accumulo.core.metadata.MetadataTable; import org.apache.accumulo.core.metadata.RootTable; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletLocationState; -import org.apache.accumulo.core.metadata.TabletState; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.core.metrics.MetricsUtil; @@ -649,16 +648,6 @@ public class Manager extends AbstractServer break; case STARTED: case SPLITTING: - return TabletGoalState.HOSTED; - case WAITING_FOR_CHOPPED: - if (tls.getState(tserverSet.getCurrentServers()).equals(TabletState.HOSTED)) { - if (tls.chopped) { - return TabletGoalState.UNASSIGNED; - } - } else if (tls.chopped && tls.walogs.isEmpty()) { - return TabletGoalState.UNASSIGNED; - } - return TabletGoalState.HOSTED; case WAITING_FOR_OFFLINE: case MERGING: diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index b3affa47da..b2384972c3 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -30,12 +30,16 @@ import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Objects; +import java.util.Optional; import java.util.Set; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.TimeUnit; +import java.util.function.Predicate; +import java.util.stream.Stream; import org.apache.accumulo.core.client.AccumuloClient; import org.apache.accumulo.core.client.AccumuloException; @@ -66,6 +70,7 @@ import org.apache.accumulo.core.metadata.TabletLocationState.BadLocationStateExc import org.apache.accumulo.core.metadata.TabletState; import org.apache.accumulo.core.metadata.schema.Ample; import org.apache.accumulo.core.metadata.schema.Ample.DataLevel; +import org.apache.accumulo.core.metadata.schema.DataFileValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ChoppedColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.CurrentLocationColumnFamily; @@ -75,9 +80,12 @@ import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.Fu import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataTime; +import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.apache.accumulo.core.metadata.schema.TabletMetadata.ColumnType; import org.apache.accumulo.core.metadata.schema.TabletMetadata.Location; +import org.apache.accumulo.core.metadata.schema.TabletsMetadata; import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.tabletserver.thrift.NotServingTabletException; +import org.apache.accumulo.core.util.Pair; import org.apache.accumulo.core.util.threads.Threads.AccumuloDaemonThread; import org.apache.accumulo.manager.Manager.TabletGoalState; import org.apache.accumulo.manager.state.MergeStats; @@ -102,8 +110,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.thrift.TException; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableSortedSet; import com.google.common.collect.Iterators; +import com.google.common.collect.Sets; abstract class TabletGroupWatcher extends AccumuloDaemonThread { // Constants used to make sure assignment logging isn't excessive in quantity or size @@ -251,8 +261,6 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { stats.update(tableId, state); mergeStats.update(tls.extent, state, tls.chopped, !tls.walogs.isEmpty()); - sendChopRequest(mergeStats.getMergeInfo(), state, tls); - sendSplitRequest(mergeStats.getMergeInfo(), state, tls); // Always follow through with assignments if (state == TabletState.ASSIGNED) { @@ -557,83 +565,6 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { return result; } - private void sendSplitRequest(MergeInfo info, TabletState state, TabletLocationState tls) { - // Already split? - if (!info.getState().equals(MergeState.SPLITTING)) { - return; - } - // Merges don't split - if (!info.isDelete()) { - return; - } - // Online and ready to split? - if (!state.equals(TabletState.HOSTED)) { - return; - } - // Does this extent cover the end points of the delete? - KeyExtent range = info.getExtent(); - if (tls.extent.overlaps(range)) { - for (Text splitPoint : new Text[] {range.prevEndRow(), range.endRow()}) { - if (splitPoint == null) { - continue; - } - if (!tls.extent.contains(splitPoint)) { - continue; - } - if (splitPoint.equals(tls.extent.endRow())) { - continue; - } - if (splitPoint.equals(tls.extent.prevEndRow())) { - continue; - } - try { - TServerConnection conn; - conn = manager.tserverSet.getConnection(tls.getCurrentServer()); - if (conn != null) { - Manager.log.info("Asking {} to split {} at {}", tls.current, tls.extent, splitPoint); - conn.splitTablet(tls.extent, splitPoint); - } else { - Manager.log.warn("Not connected to server {}", tls.current); - } - } catch (NotServingTabletException e) { - Manager.log.debug("Error asking tablet server to split a tablet: ", e); - } catch (Exception e) { - Manager.log.warn("Error asking tablet server to split a tablet: ", e); - } - } - } - } - - private void sendChopRequest(MergeInfo info, TabletState state, TabletLocationState tls) { - // Don't bother if we're in the wrong state - if (!info.getState().equals(MergeState.WAITING_FOR_CHOPPED)) { - return; - } - // Tablet must be online - if (!state.equals(TabletState.HOSTED)) { - return; - } - // Tablet isn't already chopped - if (tls.chopped) { - return; - } - // Tablet ranges intersect - if (info.needsToBeChopped(tls.extent)) { - TServerConnection conn; - try { - conn = manager.tserverSet.getConnection(tls.getCurrentServer()); - if (conn != null) { - Manager.log.info("Asking {} to chop {}", tls.current, tls.extent); - conn.chop(manager.managerLock, tls.extent); - } else { - Manager.log.warn("Could not connect to server {}", tls.current); - } - } catch (TException e) { - Manager.log.warn("Communications error asking tablet server to chop a tablet"); - } - } - } - private void updateMergeState(Map<TableId,MergeStats> mergeStatsCache) { for (MergeStats stats : mergeStatsCache.values()) { try { @@ -669,8 +600,118 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { } } + // This method finds returns the deletion starting row (exclusive) for tablets that + // need to be actually deleted. If the startTablet is null then + // the deletion start row will just be null as all tablets are being deleted + // up to the end. Otherwise, this returns the endRow of the first tablet + // as the first tablet should be kept and will have been previously + // fenced if necessary + private Text getDeletionStartRow(final KeyExtent startTablet) { + if (startTablet == null) { + Manager.log.debug("First tablet for delete range is null"); + return null; + } + + final Text deletionStartRow = startTablet.endRow(); + Manager.log.debug("Start row is {} for deletion", deletionStartRow); + + return deletionStartRow; + } + + // This method finds returns the deletion ending row (inclusive) for tablets that + // need to be actually deleted. If the endTablet is null then + // the deletion end row will just be null as all tablets are being deleted + // after the start row. Otherwise, this returns the prevEndRow of the last tablet + // as the last tablet should be kept and will have been previously + // fenced if necessary + private Text getDeletionEndRow(final KeyExtent endTablet) { + if (endTablet == null) { + Manager.log.debug("Last tablet for delete range is null"); + return null; + } + + Text deletionEndRow = endTablet.prevEndRow(); + Manager.log.debug("Deletion end row is {}", deletionEndRow); + + return deletionEndRow; + } + + private static boolean isFirstTabletInTable(KeyExtent tablet) { + return tablet != null && tablet.prevEndRow() == null; + } + + private static boolean isLastTabletInTable(KeyExtent tablet) { + return tablet != null && tablet.endRow() == null; + } + + private static boolean areContiguousTablets(KeyExtent firstTablet, KeyExtent lastTablet) { + return firstTablet != null && lastTablet != null + && Objects.equals(firstTablet.endRow(), lastTablet.prevEndRow()); + } + + private boolean hasTabletsToDelete(final KeyExtent firstTabletInRange, + final KeyExtent lastTableInRange) { + // If the tablets are equal (and not null) then the deletion range is just part of 1 tablet + // which will be fenced so there are no tablets to delete. The null check is because if both + // are null then we are just deleting everything, so we do have tablets to delete + if (Objects.equals(firstTabletInRange, lastTableInRange) && firstTabletInRange != null) { + Manager.log.trace( + "No tablets to delete, firstTablet {} equals lastTablet {} in deletion range and was fenced.", + firstTabletInRange, lastTableInRange); + return false; + // If the lastTablet of the deletion range is the first tablet of the table it has been fenced + // already so nothing to actually delete before it + } else if (isFirstTabletInTable(lastTableInRange)) { + Manager.log.trace( + "No tablets to delete, lastTablet {} in deletion range is the first tablet of the table and was fenced.", + lastTableInRange); + return false; + // If the firstTablet of the deletion range is the last tablet of the table it has been fenced + // already so nothing to actually delete after it + } else if (isLastTabletInTable(firstTabletInRange)) { + Manager.log.trace( + "No tablets to delete, firstTablet {} in deletion range is the last tablet of the table and was fenced.", + firstTabletInRange); + return false; + // If the firstTablet and lastTablet are contiguous tablets then there is nothing to delete as + // each will be fenced and nothing between + } else if (areContiguousTablets(firstTabletInRange, lastTableInRange)) { + Manager.log.trace( + "No tablets to delete, firstTablet {} and lastTablet {} in deletion range are contiguous and were fenced.", + firstTabletInRange, lastTableInRange); + return false; + } + + return true; + } + private void deleteTablets(MergeInfo info) throws AccumuloException { - KeyExtent extent = info.getExtent(); + // Before updated metadata and get the first and last tablets which + // are fenced if necessary + final Pair<KeyExtent,KeyExtent> firstAndLastTablets = updateMetadataRecordsForDelete(info); + + // Find the deletion start row (exclusive) for tablets that need to be actually deleted + // This will be null if deleting everything up until the end row or it will be + // the endRow of the first tablet as the first tablet should be kept and will have + // already been fenced if necessary + final Text deletionStartRow = getDeletionStartRow(firstAndLastTablets.getFirst()); + + // Find the deletion end row (inclusive) for tablets that need to be actually deleted + // This will be null if deleting everything after the starting row or it will be + // the prevEndRow of the last tablet as the last tablet should be kept and will have + // already been fenced if necessary + Text deletionEndRow = getDeletionEndRow(firstAndLastTablets.getSecond()); + + // check if there are any tablets to delete and if not return + if (!hasTabletsToDelete(firstAndLastTablets.getFirst(), firstAndLastTablets.getSecond())) { + Manager.log.trace("No tablets to delete for range {}, returning", info.getExtent()); + return; + } + + // Build an extent for the actual deletion range + final KeyExtent extent = + new KeyExtent(info.getExtent().tableId(), deletionEndRow, deletionStartRow); + Manager.log.debug("Tablet deletion range is {}", extent); String targetSystemTable = extent.isMeta() ? RootTable.NAME : MetadataTable.NAME; Manager.log.debug("Deleting tablets for {}", extent); MetadataTime metadataTime = null; @@ -924,6 +965,191 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { } } + // This method is used to detect if a tablet needs to be split/chopped for a delete + // Instead of performing a split or chop compaction, the tablet will have its files fenced. + private boolean needsFencingForDeletion(MergeInfo info, KeyExtent keyExtent) { + // Does this extent cover the end points of the delete? + final Predicate<Text> isWithin = r -> r != null && keyExtent.contains(r); + final Predicate<Text> isNotBoundary = + r -> !r.equals(keyExtent.endRow()) && !r.equals(keyExtent.prevEndRow()); + final KeyExtent deleteRange = info.getExtent(); + + return (keyExtent.overlaps(deleteRange) && Stream + .of(deleteRange.prevEndRow(), deleteRange.endRow()).anyMatch(isWithin.and(isNotBoundary))) + || info.needsToBeChopped(keyExtent); + } + + // Instead of splitting or chopping tablets for a delete we instead create ranges + // to exclude the portion of the tablet that should be deleted + private Text followingRow(Text row) { + if (row == null) { + return null; + } + return new Key(row).followingKey(PartialKey.ROW).getRow(); + } + + // Instead of splitting or chopping tablets for a delete we instead create ranges + // to exclude the portion of the tablet that should be deleted + private List<Range> createRangesForDeletion(TabletMetadata tabletMetadata, + final KeyExtent deleteRange) { + final KeyExtent tabletExtent = tabletMetadata.getExtent(); + + // If the delete range wholly contains the tablet being deleted then there is no range to clip + // files to because the files should be completely dropped. + Preconditions.checkArgument(!deleteRange.contains(tabletExtent), "delete range:%s tablet:%s", + deleteRange, tabletExtent); + + final List<Range> ranges = new ArrayList<>(); + + if (deleteRange.overlaps(tabletExtent)) { + if (deleteRange.prevEndRow() != null + && tabletExtent.contains(followingRow(deleteRange.prevEndRow()))) { + Manager.log.trace("Fencing tablet {} files to ({},{}]", tabletExtent, + tabletExtent.prevEndRow(), deleteRange.prevEndRow()); + ranges.add(new Range(tabletExtent.prevEndRow(), false, deleteRange.prevEndRow(), true)); + } + + // This covers the case of when a deletion range overlaps the last tablet. We need to create a + // range that excludes the deletion. + if (deleteRange.endRow() != null + && tabletMetadata.getExtent().contains(deleteRange.endRow())) { + Manager.log.trace("Fencing tablet {} files to ({},{}]", tabletExtent, deleteRange.endRow(), + tabletExtent.endRow()); + ranges.add(new Range(deleteRange.endRow(), false, tabletExtent.endRow(), true)); + } + } else { + Manager.log.trace( + "Fencing tablet {} files to itself because it does not overlap delete range", + tabletExtent); + ranges.add(tabletExtent.toDataRange()); + } + + return ranges; + } + + private Pair<KeyExtent,KeyExtent> updateMetadataRecordsForDelete(MergeInfo info) + throws AccumuloException { + final KeyExtent range = info.getExtent(); + + String targetSystemTable = MetadataTable.NAME; + if (range.isMeta()) { + targetSystemTable = RootTable.NAME; + } + final Pair<KeyExtent,KeyExtent> startAndEndTablets; + + final AccumuloClient client = manager.getContext(); + + try (BatchWriter bw = client.createBatchWriter(targetSystemTable)) { + final Text startRow = range.prevEndRow(); + final Text endRow = range.endRow() != null + ? new Key(range.endRow()).followingKey(PartialKey.ROW).getRow() : null; + + // Find the tablets that overlap the start and end row of the deletion range + // If the startRow is null then there will be an empty startTablet we don't need + // to fence a starting tablet as we are deleting everything up to the end tablet + // Likewise, if the endRow is null there will be an empty endTablet as we are deleting + // all tablets after the starting tablet + final Optional<TabletMetadata> startTablet = Optional.ofNullable(startRow).flatMap( + row -> loadTabletMetadata(range.tableId(), row, ColumnType.PREV_ROW, ColumnType.FILES)); + final Optional<TabletMetadata> endTablet = Optional.ofNullable(endRow).flatMap( + row -> loadTabletMetadata(range.tableId(), row, ColumnType.PREV_ROW, ColumnType.FILES)); + + // Store the tablets in a Map if present so that if we have the same Tablet we + // only need to process the same tablet once when fencing + final SortedMap<KeyExtent,TabletMetadata> tabletMetadatas = new TreeMap<>(); + startTablet.ifPresent(ft -> tabletMetadatas.put(ft.getExtent(), ft)); + endTablet.ifPresent(lt -> tabletMetadatas.putIfAbsent(lt.getExtent(), lt)); + + // Capture the tablets to return them or null if not loaded + startAndEndTablets = new Pair<>(startTablet.map(TabletMetadata::getExtent).orElse(null), + endTablet.map(TabletMetadata::getExtent).orElse(null)); + + for (TabletMetadata tabletMetadata : tabletMetadatas.values()) { + final KeyExtent keyExtent = tabletMetadata.getExtent(); + + // Check if this tablet needs to have its files fenced for the deletion + if (needsFencingForDeletion(info, keyExtent)) { + Manager.log.debug("Found overlapping keyExtent {} for delete, fencing files.", keyExtent); + + // Create the ranges for fencing the files, this takes the place of + // chop compactions and splits + final List<Range> ranges = createRangesForDeletion(tabletMetadata, range); + Preconditions.checkState(!ranges.isEmpty(), + "No ranges found that overlap deletion range."); + + // Go through and fence each of the files that are part of the tablet + for (Entry<StoredTabletFile,DataFileValue> entry : tabletMetadata.getFilesMap() + .entrySet()) { + StoredTabletFile existing = entry.getKey(); + Value value = entry.getValue().encodeAsValue(); + + final Mutation m = new Mutation(keyExtent.toMetaRow()); + + // Go through each range that was created and modify the metadata for the file + // The end row should be inclusive for the current tablet and the previous end row + // should be exclusive for the start row. + final Set<StoredTabletFile> newFiles = new HashSet<>(); + final Set<StoredTabletFile> existingFile = Set.of(existing); + + for (Range fenced : ranges) { + // Clip range with the tablet range if the range already exists + fenced = existing.hasRange() ? existing.getRange().clip(fenced, true) : fenced; + + // If null the range is disjoint which can happen if there are existing fenced files + // If the existing file is disjoint then later we will delete if the file is not part + // of the newFiles set which means it is disjoint with all ranges + if (fenced != null) { + final StoredTabletFile newFile = StoredTabletFile.of(existing.getPath(), fenced); + Manager.log.trace("Adding new file {} with range {}", newFile.getMetadataPath(), + newFile.getRange()); + + // Add the new file to the newFiles set, it will be added later if it doesn't match + // the existing file already. We still need to add to the set to be checked later + // even if it matches the existing file as later the deletion logic will check to + // see if the existing file is part of this set before deleting. This is done to + // make sure the existing file isn't deleted unless it is not needed/disjoint + // with all ranges. + newFiles.add(newFile); + } else { + Manager.log.trace("Found a disjoint file {} with range {} on delete", + existing.getMetadataPath(), existing.getRange()); + } + } + + // If the existingFile is not contained in the newFiles set then we can delete it + Sets.difference(existingFile, newFiles).forEach( + delete -> m.putDelete(DataFileColumnFamily.NAME, existing.getMetadataText())); + // Add any new files that don't match the existingFile + Sets.difference(newFiles, existingFile).forEach( + newFile -> m.put(DataFileColumnFamily.NAME, newFile.getMetadataText(), value)); + + if (!m.getUpdates().isEmpty()) { + bw.addMutation(m); + } + } + } else { + Manager.log.debug( + "Skipping metadata update on file for keyExtent {} for delete as not overlapping on rows.", + keyExtent); + } + } + + bw.flush(); + + return startAndEndTablets; + } catch (Exception ex) { + throw new AccumuloException(ex); + } + } + + private Optional<TabletMetadata> loadTabletMetadata(TableId tabletId, final Text row, + ColumnType... columns) { + try (TabletsMetadata tabletsMetadata = manager.getContext().getAmple().readTablets() + .forTable(tabletId).overlapping(row, true, row).fetch(columns).build()) { + return tabletsMetadata.stream().findFirst(); + } + } + private void deleteTablets(MergeInfo info, Range scanRange, BatchWriter bw, AccumuloClient client) throws TableNotFoundException, MutationsRejectedException { Scanner scanner; diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java b/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java index df18c869da..0659d58536 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/state/MergeStats.java @@ -98,14 +98,6 @@ public class MergeStats { if (!info.overlaps(ke)) { return; } - if (info.needsToBeChopped(ke)) { - this.needsToBeChopped++; - if (chopped) { - if (state.equals(TabletState.HOSTED) || !hasWALs) { - this.chopped++; - } - } - } this.total++; if (state.equals(TabletState.HOSTED)) { this.hosted++; @@ -136,17 +128,10 @@ public class MergeStats { log.info("Merge range is already contained in a single tablet {}", info.getExtent()); state = MergeState.COMPLETE; } else if (hosted == total) { - if (info.isDelete()) { - if (!lowerSplit) { - log.info("Waiting for {} lower split to occur {}", info, info.getExtent()); - } else if (!upperSplit) { - log.info("Waiting for {} upper split to occur {}", info, info.getExtent()); - } else { - state = MergeState.WAITING_FOR_CHOPPED; - } - } else { - state = MergeState.WAITING_FOR_CHOPPED; - } + // Todo: Clean up references to WAITING_FOR_CHOPPED and SPLITTING and remove + // from enum in a future PR as both are going away. for now just change + // this to going to WAITING_FOR_OFFLINE as chops are not necessary + state = MergeState.WAITING_FOR_OFFLINE; } else { log.info("Waiting for {} hosted tablets to be {} {}", hosted, total, info.getExtent()); } @@ -230,11 +215,6 @@ public class MergeStats { break; } - if (!tls.walogs.isEmpty() && verify.getMergeInfo().needsToBeChopped(tls.extent)) { - log.debug("failing consistency: needs to be chopped {}", tls.extent); - return false; - } - if (prevExtent == null) { // this is the first tablet observed, it must be offline and its prev row must be less than // the start of the merge range diff --git a/test/src/main/java/org/apache/accumulo/test/functional/DeleteRowsIT.java b/test/src/main/java/org/apache/accumulo/test/functional/DeleteRowsIT.java index a18d43df13..88768a7ac6 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/DeleteRowsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/DeleteRowsIT.java @@ -18,6 +18,7 @@ */ package org.apache.accumulo.test.functional; +import static org.apache.accumulo.test.util.FileMetadataUtil.printAndVerifyFileMetadata; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -26,6 +27,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map.Entry; +import java.util.SortedSet; import java.util.TreeSet; import java.util.stream.Collectors; @@ -35,6 +37,8 @@ import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Mutation; +import org.apache.accumulo.core.data.PartialKey; +import org.apache.accumulo.core.data.TableId; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.harness.AccumuloClusterHarness; @@ -88,34 +92,84 @@ public class DeleteRowsIT extends AccumuloClusterHarness { String tableName = getUniqueNames(1)[0]; testSplit(c, tableName + i++, "f", "h", "abcdefijklmnopqrstuvwxyz", 260); // Eliminate whole tablets, partial first tablet - testSplit(c, tableName + i++, "f1", "h", "abcdeff1ijklmnopqrstuvwxyz", 262); + testSplit(c, tableName + i++, "f1", "h", "abcdefgijklmnopqrstuvwxyz", 262); // Eliminate whole tablets, partial last tablet testSplit(c, tableName + i++, "f", "h1", "abcdefijklmnopqrstuvwxyz", 258); // Eliminate whole tablets, partial first and last tablet - testSplit(c, tableName + i++, "f1", "h1", "abcdeff1ijklmnopqrstuvwxyz", 260); + testSplit(c, tableName + i++, "f1", "h1", "abcdefgijklmnopqrstuvwxyz", 260); // Eliminate one tablet testSplit(c, tableName + i++, "f", "g", "abcdefhijklmnopqrstuvwxyz", 270); + // Eliminate first tablet + testSplit(c, tableName + i++, null, "a", "bcdefghijklmnopqrstuvwxyz", 270); + // Eliminate last tablet + testSplit(c, tableName + i++, "z", null, "abcdefghijklmnopqrstuvwxyz", 260); // Eliminate partial tablet, matches start split testSplit(c, tableName + i++, "f", "f1", "abcdefghijklmnopqrstuvwxyz", 278); // Eliminate partial tablet, matches end split - testSplit(c, tableName + i++, "f1", "g", "abcdeff1hijklmnopqrstuvwxyz", 272); + testSplit(c, tableName + i++, "f1", "g", "abcdefghijklmnopqrstuvwxyz", 272); // Eliminate tablets starting at -inf testSplit(c, tableName + i++, null, "h", "ijklmnopqrstuvwxyz", 200); // Eliminate tablets ending at +inf testSplit(c, tableName + i++, "t", null, "abcdefghijklmnopqrst", 200); // Eliminate some rows inside one tablet - testSplit(c, tableName + i++, "t0", "t2", "abcdefghijklmnopqrstt0uvwxyz", 278); + testSplit(c, tableName + i++, "t0", "t2", "abcdefghijklmnopqrstuvwxyz", 278); // Eliminate some rows in the first tablet testSplit(c, tableName + i++, null, "A1", "abcdefghijklmnopqrstuvwxyz", 278); // Eliminate some rows in the last tablet - testSplit(c, tableName + i++, "{1", null, "abcdefghijklmnopqrstuvwxyz{1", 272); + testSplit(c, tableName + i++, "{1", null, "abcdefghijklmnopqrstuvwxyz", 272); // Delete everything testSplit(c, tableName + i++, null, null, "", 0); } } + // Test that deletion works on tablets that have files that have already been fenced + // The fenced files are created by doing merges first + @Test + public void testManyRowsAlreadyFenced() throws Exception { + try (AccumuloClient c = Accumulo.newClient().from(getClientProps()).build()) { + // Delete ranges of rows, and verify the tablets are removed. + int i = 0; + // Eliminate whole tablets + String tableName = getUniqueNames(1)[0]; + testSplit(c, tableName + i++, "f", "h", "abcdefijklmnopqrstuvwxyz", 260, "f", "h"); + // Eliminate whole tablets, partial first tablet + testSplit(c, tableName + i++, "f1", "h", "abcdefgijklmnopqrstuvwxyz", 262, "f", "h"); + // Eliminate whole tablets, partial last tablet + testSplit(c, tableName + i++, "f", "h1", "abcdefijklmnopqrstuvwxyz", 258, "f", "h"); + // Eliminate whole tablets, partial first and last tablet + testSplit(c, tableName + i++, "f1", "h1", "abcdefgijklmnopqrstuvwxyz", 260, "f", "h"); + // Eliminate one tablet + testSplit(c, tableName + i++, "f", "g", "abcdefhijklmnopqrstuvwxyz", 270, "f", "g"); + // Eliminate first tablet + testSplit(c, tableName + i++, null, "a", "bcdefghijklmnopqrstuvwxyz", 270, "a", "a"); + // Eliminate last tablet + testSplit(c, tableName + i++, "z", null, "abcdefghijklmnopqrstuvwxyz", 260, "z", "z"); + // Eliminate partial tablet, matches start split + testSplit(c, tableName + i++, "f", "f1", "abcdefghijklmnopqrstuvwxyz", 278, "f", "f"); + // Eliminate partial tablet, matches end split + testSplit(c, tableName + i++, "f1", "g", "abcdefghijklmnopqrstuvwxyz", 272, "f", "g"); + // Eliminate tablets starting at -inf + testSplit(c, tableName + i++, null, "h", "ijklmnopqrstuvwxyz", 200, "a", "h"); + // Eliminate tablets ending at +inf + testSplit(c, tableName + i++, "t", null, "abcdefghijklmnopqrst", 200, "t", "z"); + // Eliminate some rows inside one tablet + testSplit(c, tableName + i++, "t0", "t2", "abcdefghijklmnopqrstuvwxyz", 278, "t", "t"); + // Eliminate some rows in the first tablet + testSplit(c, tableName + i++, null, "A1", "abcdefghijklmnopqrstuvwxyz", 278, "a", "a"); + // Eliminate some rows in the last tablet + testSplit(c, tableName + i++, "{1", null, "abcdefghijklmnopqrstuvwxyz", 272, "z", "z"); + // Delete everything + testSplit(c, tableName + i++, null, null, "", 0, "a", "z"); + } + } + private void testSplit(AccumuloClient c, String table, String start, String end, String result, int entries) throws Exception { + testSplit(c, table, start, end, result, entries, null, null); + } + + private void testSplit(AccumuloClient c, String table, String start, String end, String result, + int entries, String mergeStart, String mergeEnd) throws Exception { // Put a bunch of rows on each tablet c.tableOperations().create(table); try (BatchWriter bw = c.createBatchWriter(table)) { @@ -128,8 +182,41 @@ public class DeleteRowsIT extends AccumuloClusterHarness { } bw.flush(); } + + final TableId tableId = TableId.of(c.tableOperations().tableIdMap().get(table)); // Split the table - c.tableOperations().addSplits(table, SPLITS); + + // If a merge range is defined then merge the tablets given in the range after + // The purpose of the merge is to generate file metadata that contains ranges + // so this will test deletings on existing ranged files + if (mergeStart != null) { + SortedSet<Text> splits = new TreeSet<>(SPLITS); + // Generate 2 split points for each existing split and add + SortedSet<Text> mergeSplits = + SPLITS.subSet(new Text(mergeStart), true, new Text(mergeEnd), true); + mergeSplits.forEach(split -> splits.add(new Text(split.toString() + (ROWS_PER_TABLET / 2)))); + + log.debug("After splits"); + c.tableOperations().addSplits(table, splits); + printAndVerifyFileMetadata(getServerContext(), tableId); + + // Merge back the extra splits to a single tablet per letter to generate 2 files per tablet + // that have a range + mergeSplits.forEach(split -> { + try { + c.tableOperations().merge(table, split, new Key(split.toString() + (ROWS_PER_TABLET / 2)) + .followingKey(PartialKey.ROW).getRow()); + log.debug("After Merge"); + printAndVerifyFileMetadata(getServerContext(), tableId); + } catch (Exception e) { + throw new RuntimeException(e); + } + }); + } else { + c.tableOperations().addSplits(table, SPLITS); + log.debug("After splits"); + printAndVerifyFileMetadata(getServerContext(), tableId); + } Text startText = start == null ? null : new Text(start); Text endText = end == null ? null : new Text(end); @@ -140,7 +227,10 @@ public class DeleteRowsIT extends AccumuloClusterHarness { for (Text split : remainingSplits) { sb.append(split); } + log.debug("After delete"); + printAndVerifyFileMetadata(getServerContext(), tableId); assertEquals(result, sb.toString()); + // See that the rows are really deleted try (Scanner scanner = c.createScanner(table, Authorizations.EMPTY)) { int count = 0;