This is an automated email from the ASF dual-hosted git repository. edcoleman pushed a commit to branch 1.10 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/1.10 by this push: new 19719b5ff9 add check to gc to validate metadata scan is complete (#3703) 19719b5ff9 is described below commit 19719b5ff926c1e41ef511abe97b94aa50771fb6 Author: EdColeman <d...@etcoleman.com> AuthorDate: Tue Sep 19 12:34:15 2023 +0000 add check to gc to validate metadata scan is complete (#3703) * add check to GC to validate metadata scan is complete. Check that the GC metadata scan includes dir and prev row entries to help ensure that the whole row for a GC candidate was read. Fixes #3696 for 1.10. Co-authored-by: Christopher Tubbs <ctubbsii@apache.> Co-authored-by: Keith Turner <ktur...@apache.org> --- .../accumulo/gc/GarbageCollectionAlgorithm.java | 173 ++++++++++---- .../apache/accumulo/gc/SimpleGarbageCollector.java | 2 +- .../apache/accumulo/gc/GarbageCollectionTest.java | 258 ++++++++++++++++++++- 3 files changed, 382 insertions(+), 51 deletions(-) diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java index d975dbb807..d04741d4d2 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java @@ -24,6 +24,7 @@ import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map.Entry; +import java.util.Objects; import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; @@ -47,6 +48,7 @@ import org.apache.hadoop.io.Text; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; import com.google.common.collect.Iterators; import com.google.common.collect.PeekingIterator; @@ -172,52 +174,66 @@ public class GarbageCollectionAlgorithm { } - Iterator<Entry<Key,Value>> iter = gce.getReferenceIterator(); - while (iter.hasNext()) { - Entry<Key,Value> entry = iter.next(); - Key key = entry.getKey(); - Text cft = key.getColumnFamily(); - - if (cft.equals(DataFileColumnFamily.NAME) || cft.equals(ScanFileColumnFamily.NAME)) { - String cq = key.getColumnQualifier().toString(); - - String reference = cq; - if (cq.startsWith("/")) { - String tableID = new String(KeyExtent.tableOfMetadataRow(key.getRow())); - reference = "/" + tableID + cq; - } else if (!cq.contains(":") && !cq.startsWith("../")) { - throw new RuntimeException("Bad file reference " + cq); - } + // it is important that the tracker is closed before performing deletes so that last row is + // checked + try (MetadataRowReadTracker readTracker = new MetadataRowReadTracker()) { + Iterator<Entry<Key,Value>> iter = gce.getReferenceIterator(); + while (iter.hasNext()) { + Entry<Key,Value> entry = iter.next(); + Key key = entry.getKey(); + + // check that dir entry was read for the row. If not, the metadata information may not be + // complete. Abort the gc cycle. + readTracker.trackRow(key.getRow()); + + Text cft = key.getColumnFamily(); + + if (cft.equals(DataFileColumnFamily.NAME) || cft.equals(ScanFileColumnFamily.NAME)) { + String cq = key.getColumnQualifier().toString(); + + String reference = cq; + if (cq.startsWith("/")) { + String tableID = new String(KeyExtent.tableOfMetadataRow(key.getRow())); + reference = "/" + tableID + cq; + } else if (!cq.contains(":") && !cq.startsWith("../")) { + throw new RuntimeException("Bad file reference " + cq); + } - reference = makeRelative(reference, 3); + reference = makeRelative(reference, 3); - // WARNING: This line is EXTREMELY IMPORTANT. - // You MUST REMOVE candidates that are still in use - if (candidateMap.remove(reference) != null) - log.debug("Candidate was still in use: " + reference); + // WARNING: This line is EXTREMELY IMPORTANT. + // You MUST REMOVE candidates that are still in use + if (candidateMap.remove(reference) != null) + log.debug("Candidate was still in use: " + reference); - String dir = reference.substring(0, reference.lastIndexOf('/')); - if (candidateMap.remove(dir) != null) - log.debug("Candidate was still in use: " + reference); + String dir = reference.substring(0, reference.lastIndexOf('/')); + if (candidateMap.remove(dir) != null) + log.debug("Candidate was still in use: " + reference); - } else if (TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { - String tableID = new String(KeyExtent.tableOfMetadataRow(key.getRow())); - String dir = entry.getValue().toString(); - if (!dir.contains(":")) { - if (!dir.startsWith("/")) - throw new RuntimeException("Bad directory " + dir); - dir = "/" + tableID + dir; - } + } else if (TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) { + readTracker.markDirSeen(); + String tableID = new String(KeyExtent.tableOfMetadataRow(key.getRow())); + String dir = entry.getValue().toString(); + if (!dir.contains(":")) { + if (!dir.startsWith("/")) + throw new RuntimeException("Bad directory " + dir); + dir = "/" + tableID + dir; + } - dir = makeRelative(dir, 2); + dir = makeRelative(dir, 2); - if (candidateMap.remove(dir) != null) - log.debug("Candidate was still in use: " + dir); - } else - throw new RuntimeException( - "Scanner over metadata table returned unexpected column : " + entry.getKey()); + if (candidateMap.remove(dir) != null) { + log.debug("Candidate was still in use: " + dir); + } + } else if (TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key)) { + readTracker.markPrevRowSeen(); + } else + throw new RuntimeException( + "Scanner over metadata table returned unexpected column : " + entry.getKey()); + } + // the tracker is closed at the end of this block; it will check the last row + // in its close method as its final action } - confirmDeletesFromReplication(gce.getReplicationNeededIterator(), candidateMap.entrySet().iterator()); } @@ -343,4 +359,83 @@ public class GarbageCollectionAlgorithm { deleteConfirmed(gce, candidateMap); } } + + /** + * Track metadata rows read to help validate that gc scan has complete information to make a + * decision on deleting files + */ + private static class MetadataRowReadTracker implements AutoCloseable { + private boolean hasDir = false; + private boolean hasPrevRow = false; + private Text row; + + private boolean closed = false; + + public MetadataRowReadTracker() { + this.row = null; + } + + private void validate() { + Preconditions.checkState(hasDir && hasPrevRow, + "May not have fully read metadata for row, aborting this run. Validation results: %s", + this); + } + + /** + * Initializes row tracking for the provided row. If a previous row was being tracked, it is + * checked that all expected metadata fields have been marked as seen. If all fields have not + * been marked seen, an IllegalStateException is thrown to halt further processing. + * + * @param candidate + * check current row, initialize a new row to track + */ + public void trackRow(final Text candidate) { + Preconditions.checkState(!closed); + Objects.requireNonNull(candidate); + if (row == null) { + row = candidate; // first row seen + } else if (!row.equals(candidate)) { + // row changed, validate previous + validate(); + // start tracking the next row + hasPrevRow = false; + hasDir = false; + row = candidate; + } + } + + /** + * Mark that the dir metadata entry seen for the current row being tracked. + */ + public void markDirSeen() { + Preconditions.checkState(!closed); + hasDir = true; + } + + /** + * Mark that the prevRow metadata entry seen for the current row being tracked. + */ + public void markPrevRowSeen() { + Preconditions.checkState(!closed); + hasPrevRow = true; + } + + /** + * Check that the final row processed is complete and then close tracker to additional + * processing. + */ + @Override + public void close() { + if (!closed && row != null) { + validate(); + } + closed = true; + } + + @Override + public String toString() { + return "MetadataReadTracker{row=" + row + ", hasDir=" + hasDir + ", hasPrevRow=" + hasPrevRow + + '}'; + } + } } diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java b/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java index b73d7deec5..bfd4dc9b3c 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java @@ -328,7 +328,7 @@ public class SimpleGarbageCollector extends AccumuloServerContext implements Ifa scanner.fetchColumnFamily(ScanFileColumnFamily.NAME); TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(scanner); TabletIterator tabletIterator = - new TabletIterator(scanner, MetadataSchema.TabletsSection.getRange(), false, true); + new TabletIterator(scanner, MetadataSchema.TabletsSection.getRange(), true, true); return Iterators .concat(Iterators.transform(tabletIterator, input -> input.entrySet().iterator())); diff --git a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java index 56b43078cb..70dbadb299 100644 --- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java +++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java @@ -17,6 +17,7 @@ package org.apache.accumulo.gc; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -57,6 +58,8 @@ public class GarbageCollectionTest { ArrayList<String> tablesDirsToDelete = new ArrayList<>(); TreeMap<String,Status> filesToReplicate = new TreeMap<>(); + public TestGCE() {} + @Override public boolean getCandidates(String continuePoint, List<String> ret) { Iterator<String> iter = candidates.tailSet(continuePoint, false).iterator(); @@ -102,14 +105,14 @@ public class GarbageCollectionTest { return key; } - public Value addFileReference(String tableId, String endRow, String file) { + public void addFileReference(String tableId, String endRow, String file) { Key key = newFileReferenceKey(tableId, endRow, file); Value val = new Value(new DataFileValue(0, 0).encode()); - return references.put(key, val); + references.put(key, val); } - public Value removeFileReference(String tableId, String endRow, String file) { - return references.remove(newFileReferenceKey(tableId, endRow, file)); + public void removeFileReference(String tableId, String endRow, String file) { + references.remove(newFileReferenceKey(tableId, endRow, file)); } Key newDirReferenceKey(String tableId, String endRow) { @@ -123,14 +126,34 @@ public class GarbageCollectionTest { return key; } - public Value addDirReference(String tableId, String endRow, String dir) { + public void addDirReference(String tableId, String endRow, String dir) { Key key = newDirReferenceKey(tableId, endRow); Value val = new Value(dir.getBytes()); - return references.put(key, val); + references.put(key, val); } - public Value removeDirReference(String tableId, String endRow) { - return references.remove(newDirReferenceKey(tableId, endRow)); + public Key newPrevRowKey(String tableId, String endRow) { + String row = new KeyExtent(tableId, endRow == null ? null : new Text(endRow), null) + .getMetadataEntry().toString(); + String cf = MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.getColumnFamily() + .toString(); + String cq = MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN + .getColumnQualifier().toString(); + return new Key(row, cf, cq); + } + + public void addPrevRowReference(String tableId, String endRow) { + Key key = newPrevRowKey(tableId, endRow); + Value val = new Value(); + references.put(key, val); + } + + public void removeDirReference(String tableId, String endRow) { + references.remove(newDirReferenceKey(tableId, endRow)); + } + + public void removePrevRowReference(String tableId, String endRow) { + references.remove(newPrevRowKey(tableId, endRow)); } @Override @@ -167,6 +190,11 @@ public class GarbageCollectionTest { gce.addFileReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0//F002.rf"); gce.addFileReference("5", null, "hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf"); + gce.addDirReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0"); + gce.addPrevRowReference("4", null); + gce.addDirReference("5", null, "hdfs://foo.com:6000/accumulo/tables/4/t0"); + gce.addPrevRowReference("5", null); + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); gca.collect(gce); @@ -208,8 +236,16 @@ public class GarbageCollectionTest { gce.addFileReference("4", null, "/t0/F000.rf"); gce.addFileReference("4", null, "/t0/F001.rf"); gce.addFileReference("4", null, "/t0/F002.rf"); + gce.addDirReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0"); + gce.addPrevRowReference("4", null); + gce.addFileReference("5", null, "../4/t0/F000.rf"); + gce.addDirReference("5", null, "hdfs://foo.com:6000/accumulo/tables/4/t0"); + gce.addPrevRowReference("5", null); + gce.addFileReference("6", null, "hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf"); + gce.addDirReference("6", null, "hdfs://foo.com:6000/accumulo/tables/4/t0"); + gce.addPrevRowReference("6", null); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); @@ -309,18 +345,37 @@ public class GarbageCollectionTest { gce.candidates.add("hdfs://foo:6000/accumulo/tables/d/t-0"); gce.addDirReference("4", null, "/t-0"); - gce.addDirReference("5", null, "/t-0"); + gce.addPrevRowReference("4", null); + gce.addDirReference("5", null, "hdfs://foo.com:6000/accumulo/tables/5/t-0"); + gce.addPrevRowReference("5", null); gce.addDirReference("6", null, "hdfs://foo.com:6000/accumulo/tables/6/t-0"); + gce.addPrevRowReference("6", null); gce.addDirReference("7", null, "hdfs://foo.com:6000/accumulo/tables/7/t-0"); + gce.addPrevRowReference("7", null); gce.addFileReference("8", "m", "/t-0/F00.rf"); + gce.addDirReference("8", "m", "/t-0"); + gce.addPrevRowReference("8", "m"); + gce.addFileReference("9", "m", "/t-0/F00.rf"); + gce.addDirReference("9", "m", "/t-0"); + gce.addPrevRowReference("9", "m"); gce.addFileReference("a", "m", "hdfs://foo.com:6000/accumulo/tables/a/t-0/F00.rf"); + gce.addDirReference("a", "m", "hdfs://foo.com:6000/accumulo/tables/a/t-0"); + gce.addPrevRowReference("a", "m"); + gce.addFileReference("b", "m", "hdfs://foo.com:6000/accumulo/tables/b/t-0/F00.rf"); + gce.addDirReference("b", "m", "hdfs://foo.com:6000/accumulo/tables/b/t-0"); + gce.addPrevRowReference("b", "m"); gce.addFileReference("e", "m", "../c/t-0/F00.rf"); + gce.addDirReference("e", "m", "hdfs://foo.com:6000/accumulo/tables/c/t-0"); + gce.addPrevRowReference("e", "m"); + gce.addFileReference("f", "m", "../d/t-0/F00.rf"); + gce.addDirReference("f", "m", "hdfs://foo.com:6000/accumulo/tables/d/t-0"); + gce.addPrevRowReference("f", "m"); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); @@ -330,24 +385,49 @@ public class GarbageCollectionTest { // Removing the dir reference for a table will delete all tablet directories gce.removeDirReference("5", null); + gce.removePrevRowReference("5", null); + gca.collect(gce); assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/5/t-0"); gce.removeDirReference("4", null); + gce.removePrevRowReference("4", null); + gca.collect(gce); assertRemoved(gce, "/4/t-0"); gce.removeDirReference("6", null); + gce.removePrevRowReference("6", null); gce.removeDirReference("7", null); + gce.removePrevRowReference("7", null); + gca.collect(gce); assertRemoved(gce, "/6/t-0", "hdfs://foo:6000/accumulo/tables/7/t-0/"); gce.removeFileReference("8", "m", "/t-0/F00.rf"); + gce.removeDirReference("8", "m"); + gce.removePrevRowReference("8", "m"); + gce.removeFileReference("9", "m", "/t-0/F00.rf"); + gce.removeDirReference("9", "m"); + gce.removePrevRowReference("9", "m"); + gce.removeFileReference("a", "m", "hdfs://foo.com:6000/accumulo/tables/a/t-0/F00.rf"); + gce.removeDirReference("a", "m"); + gce.removePrevRowReference("a", "m"); + gce.removeFileReference("b", "m", "hdfs://foo.com:6000/accumulo/tables/b/t-0/F00.rf"); + gce.removeDirReference("b", "m"); + gce.removePrevRowReference("b", "m"); + gce.removeFileReference("e", "m", "../c/t-0/F00.rf"); + gce.removeDirReference("e", "m"); + gce.removePrevRowReference("e", "m"); + gce.removeFileReference("f", "m", "../d/t-0/F00.rf"); + gce.removeDirReference("f", "m"); + gce.removePrevRowReference("f", "m"); + gca.collect(gce); assertRemoved(gce, "/8/t-0", "hdfs://foo:6000/accumulo/tables/9/t-0", "/a/t-0", "hdfs://foo:6000/accumulo/tables/b/t-0", "/c/t-0", "hdfs://foo:6000/accumulo/tables/d/t-0"); @@ -373,18 +453,37 @@ public class GarbageCollectionTest { gce.candidates.add("hdfs://foo:6000/user/foo/tables/d/t-0"); gce.addDirReference("4", null, "/t-0"); + gce.addPrevRowReference("4", null); gce.addDirReference("5", null, "/t-0"); + gce.addPrevRowReference("5", null); gce.addDirReference("6", null, "hdfs://foo.com:6000/user/foo/tables/6/t-0"); + gce.addPrevRowReference("6", null); gce.addDirReference("7", null, "hdfs://foo.com:6000/user/foo/tables/7/t-0"); + gce.addPrevRowReference("7", null); gce.addFileReference("8", "m", "/t-0/F00.rf"); + gce.addDirReference("8", "m", "hdfs://foo.com:6000/user/foo/tables/8/t-0"); + gce.addPrevRowReference("8", "m"); + gce.addFileReference("9", "m", "/t-0/F00.rf"); + gce.addDirReference("9", "m", "hdfs://foo.com:6000/user/foo/tables/9/t-0"); + gce.addPrevRowReference("9", "m"); gce.addFileReference("a", "m", "hdfs://foo.com:6000/user/foo/tables/a/t-0/F00.rf"); + gce.addDirReference("a", "m", "hdfs://foo.com:6000/user/foo/tables/a/t-0"); + gce.addPrevRowReference("a", "m"); + gce.addFileReference("b", "m", "hdfs://foo.com:6000/user/foo/tables/b/t-0/F00.rf"); + gce.addDirReference("b", "m", "hdfs://foo.com:6000/user/foo/tables/b/t-0"); + gce.addPrevRowReference("b", "m"); gce.addFileReference("e", "m", "../c/t-0/F00.rf"); + gce.addDirReference("e", "m", "hdfs://foo.com:6000/user/foo/tables/c/t-0"); + gce.addPrevRowReference("e", "m"); + gce.addFileReference("f", "m", "../d/t-0/F00.rf"); + gce.addDirReference("f", "m", "hdfs://foo.com:6000/user/foo/tables/d/t-0"); + gce.addPrevRowReference("f", "m"); GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); @@ -394,24 +493,48 @@ public class GarbageCollectionTest { // Removing the dir reference for a table will delete all tablet directories gce.removeDirReference("5", null); + gce.removePrevRowReference("5", null); gca.collect(gce); assertRemoved(gce, "hdfs://foo.com:6000/user/foo/tables/5/t-0"); gce.removeDirReference("4", null); + gce.removePrevRowReference("4", null); gca.collect(gce); assertRemoved(gce, "/4/t-0"); gce.removeDirReference("6", null); + gce.removePrevRowReference("6", null); + gce.removeDirReference("7", null); + gce.removePrevRowReference("7", null); + gca.collect(gce); assertRemoved(gce, "/6/t-0", "hdfs://foo:6000/user/foo/tables/7/t-0/"); gce.removeFileReference("8", "m", "/t-0/F00.rf"); + gce.removeDirReference("8", "m"); + gce.removePrevRowReference("8", "m"); + gce.removeFileReference("9", "m", "/t-0/F00.rf"); + gce.removeDirReference("9", "m"); + gce.removePrevRowReference("9", "m"); + gce.removeFileReference("a", "m", "hdfs://foo.com:6000/user/foo/tables/a/t-0/F00.rf"); + gce.removeDirReference("a", "m"); + gce.removePrevRowReference("a", "m"); + gce.removeFileReference("b", "m", "hdfs://foo.com:6000/user/foo/tables/b/t-0/F00.rf"); + gce.removeDirReference("b", "m"); + gce.removePrevRowReference("b", "m"); + gce.removeFileReference("e", "m", "../c/t-0/F00.rf"); + gce.removeDirReference("e", "m"); + gce.removePrevRowReference("e", "m"); + gce.removeFileReference("f", "m", "../d/t-0/F00.rf"); + gce.removeDirReference("f", "m"); + gce.removePrevRowReference("f", "m"); + gca.collect(gce); assertRemoved(gce, "/8/t-0", "hdfs://foo:6000/user/foo/tables/9/t-0", "/a/t-0", "hdfs://foo:6000/user/foo/tables/b/t-0", "/c/t-0", "hdfs://foo:6000/user/foo/tables/d/t-0"); @@ -470,8 +593,8 @@ public class GarbageCollectionTest { @Test public void testBadDeletes() throws Exception { GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); - TestGCE gce = new TestGCE(); + gce.candidates.add(""); gce.candidates.add("A"); gce.candidates.add("/"); @@ -492,12 +615,13 @@ public class GarbageCollectionTest { @Test public void test() throws Exception { - GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet"); gce.addDirReference("1636", null, "/default_tablet"); + gce.addPrevRowReference("1636", null); gca.collect(gce); assertRemoved(gce); @@ -515,6 +639,7 @@ public class GarbageCollectionTest { gce = new TestGCE(); gce.addFileReference("1636", null, "../9/default_tablet/someFile"); gce.addDirReference("1636", null, "/default_tablet"); + gce.addPrevRowReference("1636", null); gce.candidates.add("/9/default_tablet/someFile"); gca.collect(gce); assertRemoved(gce); @@ -554,6 +679,7 @@ public class GarbageCollectionTest { gce.candidates.add("hdfs://foo:6000/accumulo/tables/7/t-0/"); gce.addDirReference("7", null, "hdfs://foo.com:6000/accumulo/tables/7/t-0"); + gce.addPrevRowReference("7", null); gca.collect(gce); @@ -644,4 +770,114 @@ public class GarbageCollectionTest { assertEquals(1, gce.deletes.size()); assertEquals("hdfs://foo.com:6000/accumulo/tables/2/t-00002/A000002.rf", gce.deletes.get(0)); } + + /** + * Minimal test to show that dir and prevRow are required for valid scan (go path) + */ + @Test + public void testDirAndPrevRow() throws Exception { + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + + TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet"); + gce.addDirReference("1636", null, "/default_tablet"); + gce.addPrevRowReference("1636", null); + gca.collect(gce); + assertEquals(0, gce.deletes.size()); + } + + /** + * Show that IllegalState is thrown when no dir entry present in metadata scan in last row seen. + */ + @Test + public void testNoDirAsLastRow() { + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + + TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet"); + gce.addPrevRowReference("1636", null); + assertThrows(IllegalStateException.class, () -> gca.collect(gce)); + assertEquals(1, gce.candidates.size()); + } + + /** + * Show that IllegalState is thrown when no dir entry present in metadata scan. + */ + @Test + public void testNoDir() { + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + + TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet/f1"); + gce.addPrevRowReference("1636", "a"); + + gce.candidates.add("/1636/t1/f2"); + gce.addDirReference("1636", null, "/t1"); + gce.addPrevRowReference("1636", null); + + assertThrows(IllegalStateException.class, () -> gca.collect(gce)); + assertEquals(2, gce.candidates.size()); + } + + /** + * Show that IllegalState is thrown when no prev row present in metadata scan in last row seen. + */ + @Test + public void testNoPrevRowAsLastRow() { + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + + TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet"); + gce.addDirReference("1636", null, "/default_tablet"); + assertThrows(IllegalStateException.class, () -> gca.collect(gce)); + assertEquals(1, gce.candidates.size()); + } + + /** + * Show that IllegalState is thrown when no prevRow entry present in metadata scan. + */ + @Test + public void testPrevRow() { + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + + TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet/f1"); + gce.addDirReference("1636", "a", "/default_tablet"); + + gce.candidates.add("/1636/t1/f2"); + gce.addDirReference("1636", null, "/t1"); + gce.addPrevRowReference("1636", null); + + assertThrows(IllegalStateException.class, () -> gca.collect(gce)); + assertEquals(2, gce.candidates.size()); + } + + /** + * Show that IllegalState is thrown when no prevRow entry present in metadata scan. + */ + @Test + public void testPrevRowOnly() { + TestGCE gce = new TestGCE(); + + gce.candidates.add("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf"); + gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf"); + gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf"); + + gce.addPrevRowReference("1636", null); + + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + assertThrows(IllegalStateException.class, () -> gca.collect(gce)); + assertEquals(3, gce.candidates.size()); + } + + @Test + public void testNoPrevRowNoDir() throws Exception { + + GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm(); + + TestGCE gce = new TestGCE(); + gce.candidates.add("/1636/default_tablet"); + gce.addFileReference("b", "m", "hdfs://foo.com:6000/user/foo/tables/b/t-0/F00.rf"); + assertThrows(IllegalStateException.class, () -> gca.collect(gce)); + } }