This is an automated email from the ASF dual-hosted git repository.

edcoleman pushed a commit to branch 1.10
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/1.10 by this push:
     new 19719b5ff9 add check to gc to validate metadata scan is complete 
(#3703)
19719b5ff9 is described below

commit 19719b5ff926c1e41ef511abe97b94aa50771fb6
Author: EdColeman <d...@etcoleman.com>
AuthorDate: Tue Sep 19 12:34:15 2023 +0000

    add check to gc to validate metadata scan is complete (#3703)
    
    * add check to GC to validate metadata scan is complete.
    
    Check that the GC metadata scan includes dir and prev row entries to help 
ensure that the whole
    row for a GC candidate was read.
    
    Fixes #3696 for 1.10.
    
    Co-authored-by: Christopher Tubbs <ctubbsii@apache.>
    Co-authored-by: Keith Turner <ktur...@apache.org>
---
 .../accumulo/gc/GarbageCollectionAlgorithm.java    | 173 ++++++++++----
 .../apache/accumulo/gc/SimpleGarbageCollector.java |   2 +-
 .../apache/accumulo/gc/GarbageCollectionTest.java  | 258 ++++++++++++++++++++-
 3 files changed, 382 insertions(+), 51 deletions(-)

diff --git 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
index d975dbb807..d04741d4d2 100644
--- 
a/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
+++ 
b/server/gc/src/main/java/org/apache/accumulo/gc/GarbageCollectionAlgorithm.java
@@ -24,6 +24,7 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map.Entry;
+import java.util.Objects;
 import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
@@ -47,6 +48,7 @@ import org.apache.hadoop.io.Text;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Iterators;
 import com.google.common.collect.PeekingIterator;
 
@@ -172,52 +174,66 @@ public class GarbageCollectionAlgorithm {
 
     }
 
-    Iterator<Entry<Key,Value>> iter = gce.getReferenceIterator();
-    while (iter.hasNext()) {
-      Entry<Key,Value> entry = iter.next();
-      Key key = entry.getKey();
-      Text cft = key.getColumnFamily();
-
-      if (cft.equals(DataFileColumnFamily.NAME) || 
cft.equals(ScanFileColumnFamily.NAME)) {
-        String cq = key.getColumnQualifier().toString();
-
-        String reference = cq;
-        if (cq.startsWith("/")) {
-          String tableID = new 
String(KeyExtent.tableOfMetadataRow(key.getRow()));
-          reference = "/" + tableID + cq;
-        } else if (!cq.contains(":") && !cq.startsWith("../")) {
-          throw new RuntimeException("Bad file reference " + cq);
-        }
+    // it is important that the tracker is closed before performing deletes so 
that last row is
+    // checked
+    try (MetadataRowReadTracker readTracker = new MetadataRowReadTracker()) {
+      Iterator<Entry<Key,Value>> iter = gce.getReferenceIterator();
+      while (iter.hasNext()) {
+        Entry<Key,Value> entry = iter.next();
+        Key key = entry.getKey();
+
+        // check that dir entry was read for the row. If not, the metadata 
information may not be
+        // complete. Abort the gc cycle.
+        readTracker.trackRow(key.getRow());
+
+        Text cft = key.getColumnFamily();
+
+        if (cft.equals(DataFileColumnFamily.NAME) || 
cft.equals(ScanFileColumnFamily.NAME)) {
+          String cq = key.getColumnQualifier().toString();
+
+          String reference = cq;
+          if (cq.startsWith("/")) {
+            String tableID = new 
String(KeyExtent.tableOfMetadataRow(key.getRow()));
+            reference = "/" + tableID + cq;
+          } else if (!cq.contains(":") && !cq.startsWith("../")) {
+            throw new RuntimeException("Bad file reference " + cq);
+          }
 
-        reference = makeRelative(reference, 3);
+          reference = makeRelative(reference, 3);
 
-        // WARNING: This line is EXTREMELY IMPORTANT.
-        // You MUST REMOVE candidates that are still in use
-        if (candidateMap.remove(reference) != null)
-          log.debug("Candidate was still in use: " + reference);
+          // WARNING: This line is EXTREMELY IMPORTANT.
+          // You MUST REMOVE candidates that are still in use
+          if (candidateMap.remove(reference) != null)
+            log.debug("Candidate was still in use: " + reference);
 
-        String dir = reference.substring(0, reference.lastIndexOf('/'));
-        if (candidateMap.remove(dir) != null)
-          log.debug("Candidate was still in use: " + reference);
+          String dir = reference.substring(0, reference.lastIndexOf('/'));
+          if (candidateMap.remove(dir) != null)
+            log.debug("Candidate was still in use: " + reference);
 
-      } else if 
(TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) {
-        String tableID = new 
String(KeyExtent.tableOfMetadataRow(key.getRow()));
-        String dir = entry.getValue().toString();
-        if (!dir.contains(":")) {
-          if (!dir.startsWith("/"))
-            throw new RuntimeException("Bad directory " + dir);
-          dir = "/" + tableID + dir;
-        }
+        } else if 
(TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.hasColumns(key)) {
+          readTracker.markDirSeen();
+          String tableID = new 
String(KeyExtent.tableOfMetadataRow(key.getRow()));
+          String dir = entry.getValue().toString();
+          if (!dir.contains(":")) {
+            if (!dir.startsWith("/"))
+              throw new RuntimeException("Bad directory " + dir);
+            dir = "/" + tableID + dir;
+          }
 
-        dir = makeRelative(dir, 2);
+          dir = makeRelative(dir, 2);
 
-        if (candidateMap.remove(dir) != null)
-          log.debug("Candidate was still in use: " + dir);
-      } else
-        throw new RuntimeException(
-            "Scanner over metadata table returned unexpected column : " + 
entry.getKey());
+          if (candidateMap.remove(dir) != null) {
+            log.debug("Candidate was still in use: " + dir);
+          }
+        } else if 
(TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.hasColumns(key)) {
+          readTracker.markPrevRowSeen();
+        } else
+          throw new RuntimeException(
+              "Scanner over metadata table returned unexpected column : " + 
entry.getKey());
+      }
+      // the tracker is closed at the end of this block; it will check the 
last row
+      // in its close method as its final action
     }
-
     confirmDeletesFromReplication(gce.getReplicationNeededIterator(),
         candidateMap.entrySet().iterator());
   }
@@ -343,4 +359,83 @@ public class GarbageCollectionAlgorithm {
       deleteConfirmed(gce, candidateMap);
     }
   }
+
+  /**
+   * Track metadata rows read to help validate that gc scan has complete 
information to make a
+   * decision on deleting files
+   */
+  private static class MetadataRowReadTracker implements AutoCloseable {
+    private boolean hasDir = false;
+    private boolean hasPrevRow = false;
+    private Text row;
+
+    private boolean closed = false;
+
+    public MetadataRowReadTracker() {
+      this.row = null;
+    }
+
+    private void validate() {
+      Preconditions.checkState(hasDir && hasPrevRow,
+          "May not have fully read metadata for row, aborting this run. 
Validation results: %s",
+          this);
+    }
+
+    /**
+     * Initializes row tracking for the provided row. If a previous row was 
being tracked, it is
+     * checked that all expected metadata fields have been marked as seen. If 
all fields have not
+     * been marked seen, an IllegalStateException is thrown to halt further 
processing.
+     *
+     * @param candidate
+     *          check current row, initialize a new row to track
+     */
+    public void trackRow(final Text candidate) {
+      Preconditions.checkState(!closed);
+      Objects.requireNonNull(candidate);
+      if (row == null) {
+        row = candidate; // first row seen
+      } else if (!row.equals(candidate)) {
+        // row changed, validate previous
+        validate();
+        // start tracking the next row
+        hasPrevRow = false;
+        hasDir = false;
+        row = candidate;
+      }
+    }
+
+    /**
+     * Mark that the dir metadata entry seen for the current row being tracked.
+     */
+    public void markDirSeen() {
+      Preconditions.checkState(!closed);
+      hasDir = true;
+    }
+
+    /**
+     * Mark that the prevRow metadata entry seen for the current row being 
tracked.
+     */
+    public void markPrevRowSeen() {
+      Preconditions.checkState(!closed);
+      hasPrevRow = true;
+    }
+
+    /**
+     * Check that the final row processed is complete and then close tracker 
to additional
+     * processing.
+     */
+    @Override
+    public void close() {
+      if (!closed && row != null) {
+        validate();
+      }
+      closed = true;
+    }
+
+    @Override
+    public String toString() {
+      return "MetadataReadTracker{row=" + row + ", hasDir=" + hasDir + ", 
hasPrevRow=" + hasPrevRow
+          + '}';
+    }
+  }
 }
diff --git 
a/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java 
b/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java
index b73d7deec5..bfd4dc9b3c 100644
--- a/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java
+++ b/server/gc/src/main/java/org/apache/accumulo/gc/SimpleGarbageCollector.java
@@ -328,7 +328,7 @@ public class SimpleGarbageCollector extends 
AccumuloServerContext implements Ifa
       scanner.fetchColumnFamily(ScanFileColumnFamily.NAME);
       TabletsSection.ServerColumnFamily.DIRECTORY_COLUMN.fetch(scanner);
       TabletIterator tabletIterator =
-          new TabletIterator(scanner, 
MetadataSchema.TabletsSection.getRange(), false, true);
+          new TabletIterator(scanner, 
MetadataSchema.TabletsSection.getRange(), true, true);
 
       return Iterators
           .concat(Iterators.transform(tabletIterator, input -> 
input.entrySet().iterator()));
diff --git 
a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java 
b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
index 56b43078cb..70dbadb299 100644
--- a/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
+++ b/server/gc/src/test/java/org/apache/accumulo/gc/GarbageCollectionTest.java
@@ -17,6 +17,7 @@
 package org.apache.accumulo.gc;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
 import static org.junit.Assert.assertTrue;
 
 import java.util.ArrayList;
@@ -57,6 +58,8 @@ public class GarbageCollectionTest {
     ArrayList<String> tablesDirsToDelete = new ArrayList<>();
     TreeMap<String,Status> filesToReplicate = new TreeMap<>();
 
+    public TestGCE() {}
+
     @Override
     public boolean getCandidates(String continuePoint, List<String> ret) {
       Iterator<String> iter = candidates.tailSet(continuePoint, 
false).iterator();
@@ -102,14 +105,14 @@ public class GarbageCollectionTest {
       return key;
     }
 
-    public Value addFileReference(String tableId, String endRow, String file) {
+    public void addFileReference(String tableId, String endRow, String file) {
       Key key = newFileReferenceKey(tableId, endRow, file);
       Value val = new Value(new DataFileValue(0, 0).encode());
-      return references.put(key, val);
+      references.put(key, val);
     }
 
-    public Value removeFileReference(String tableId, String endRow, String 
file) {
-      return references.remove(newFileReferenceKey(tableId, endRow, file));
+    public void removeFileReference(String tableId, String endRow, String 
file) {
+      references.remove(newFileReferenceKey(tableId, endRow, file));
     }
 
     Key newDirReferenceKey(String tableId, String endRow) {
@@ -123,14 +126,34 @@ public class GarbageCollectionTest {
       return key;
     }
 
-    public Value addDirReference(String tableId, String endRow, String dir) {
+    public void addDirReference(String tableId, String endRow, String dir) {
       Key key = newDirReferenceKey(tableId, endRow);
       Value val = new Value(dir.getBytes());
-      return references.put(key, val);
+      references.put(key, val);
     }
 
-    public Value removeDirReference(String tableId, String endRow) {
-      return references.remove(newDirReferenceKey(tableId, endRow));
+    public Key newPrevRowKey(String tableId, String endRow) {
+      String row = new KeyExtent(tableId, endRow == null ? null : new 
Text(endRow), null)
+          .getMetadataEntry().toString();
+      String cf = 
MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.getColumnFamily()
+          .toString();
+      String cq = 
MetadataSchema.TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN
+          .getColumnQualifier().toString();
+      return new Key(row, cf, cq);
+    }
+
+    public void addPrevRowReference(String tableId, String endRow) {
+      Key key = newPrevRowKey(tableId, endRow);
+      Value val = new Value();
+      references.put(key, val);
+    }
+
+    public void removeDirReference(String tableId, String endRow) {
+      references.remove(newDirReferenceKey(tableId, endRow));
+    }
+
+    public void removePrevRowReference(String tableId, String endRow) {
+      references.remove(newPrevRowKey(tableId, endRow));
     }
 
     @Override
@@ -167,6 +190,11 @@ public class GarbageCollectionTest {
     gce.addFileReference("4", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0//F002.rf");
     gce.addFileReference("5", null, 
"hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf");
 
+    gce.addDirReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0");
+    gce.addPrevRowReference("4", null);
+    gce.addDirReference("5", null, "hdfs://foo.com:6000/accumulo/tables/4/t0");
+    gce.addPrevRowReference("5", null);
+
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
     gca.collect(gce);
@@ -208,8 +236,16 @@ public class GarbageCollectionTest {
     gce.addFileReference("4", null, "/t0/F000.rf");
     gce.addFileReference("4", null, "/t0/F001.rf");
     gce.addFileReference("4", null, "/t0/F002.rf");
+    gce.addDirReference("4", null, "hdfs://foo.com:6000/accumulo/tables/4/t0");
+    gce.addPrevRowReference("4", null);
+
     gce.addFileReference("5", null, "../4/t0/F000.rf");
+    gce.addDirReference("5", null, "hdfs://foo.com:6000/accumulo/tables/4/t0");
+    gce.addPrevRowReference("5", null);
+
     gce.addFileReference("6", null, 
"hdfs://foo.com:6000/accumulo/tables/4/t0/F000.rf");
+    gce.addDirReference("6", null, "hdfs://foo.com:6000/accumulo/tables/4/t0");
+    gce.addPrevRowReference("6", null);
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
@@ -309,18 +345,37 @@ public class GarbageCollectionTest {
     gce.candidates.add("hdfs://foo:6000/accumulo/tables/d/t-0");
 
     gce.addDirReference("4", null, "/t-0");
-    gce.addDirReference("5", null, "/t-0");
+    gce.addPrevRowReference("4", null);
+    gce.addDirReference("5", null, 
"hdfs://foo.com:6000/accumulo/tables/5/t-0");
+    gce.addPrevRowReference("5", null);
     gce.addDirReference("6", null, 
"hdfs://foo.com:6000/accumulo/tables/6/t-0");
+    gce.addPrevRowReference("6", null);
     gce.addDirReference("7", null, 
"hdfs://foo.com:6000/accumulo/tables/7/t-0");
+    gce.addPrevRowReference("7", null);
 
     gce.addFileReference("8", "m", "/t-0/F00.rf");
+    gce.addDirReference("8", "m", "/t-0");
+    gce.addPrevRowReference("8", "m");
+
     gce.addFileReference("9", "m", "/t-0/F00.rf");
+    gce.addDirReference("9", "m", "/t-0");
+    gce.addPrevRowReference("9", "m");
 
     gce.addFileReference("a", "m", 
"hdfs://foo.com:6000/accumulo/tables/a/t-0/F00.rf");
+    gce.addDirReference("a", "m", "hdfs://foo.com:6000/accumulo/tables/a/t-0");
+    gce.addPrevRowReference("a", "m");
+
     gce.addFileReference("b", "m", 
"hdfs://foo.com:6000/accumulo/tables/b/t-0/F00.rf");
+    gce.addDirReference("b", "m", "hdfs://foo.com:6000/accumulo/tables/b/t-0");
+    gce.addPrevRowReference("b", "m");
 
     gce.addFileReference("e", "m", "../c/t-0/F00.rf");
+    gce.addDirReference("e", "m", "hdfs://foo.com:6000/accumulo/tables/c/t-0");
+    gce.addPrevRowReference("e", "m");
+
     gce.addFileReference("f", "m", "../d/t-0/F00.rf");
+    gce.addDirReference("f", "m", "hdfs://foo.com:6000/accumulo/tables/d/t-0");
+    gce.addPrevRowReference("f", "m");
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
@@ -330,24 +385,49 @@ public class GarbageCollectionTest {
 
     // Removing the dir reference for a table will delete all tablet 
directories
     gce.removeDirReference("5", null);
+    gce.removePrevRowReference("5", null);
+
     gca.collect(gce);
     assertRemoved(gce, "hdfs://foo.com:6000/accumulo/tables/5/t-0");
 
     gce.removeDirReference("4", null);
+    gce.removePrevRowReference("4", null);
+
     gca.collect(gce);
     assertRemoved(gce, "/4/t-0");
 
     gce.removeDirReference("6", null);
+    gce.removePrevRowReference("6", null);
     gce.removeDirReference("7", null);
+    gce.removePrevRowReference("7", null);
+
     gca.collect(gce);
     assertRemoved(gce, "/6/t-0", "hdfs://foo:6000/accumulo/tables/7/t-0/");
 
     gce.removeFileReference("8", "m", "/t-0/F00.rf");
+    gce.removeDirReference("8", "m");
+    gce.removePrevRowReference("8", "m");
+
     gce.removeFileReference("9", "m", "/t-0/F00.rf");
+    gce.removeDirReference("9", "m");
+    gce.removePrevRowReference("9", "m");
+
     gce.removeFileReference("a", "m", 
"hdfs://foo.com:6000/accumulo/tables/a/t-0/F00.rf");
+    gce.removeDirReference("a", "m");
+    gce.removePrevRowReference("a", "m");
+
     gce.removeFileReference("b", "m", 
"hdfs://foo.com:6000/accumulo/tables/b/t-0/F00.rf");
+    gce.removeDirReference("b", "m");
+    gce.removePrevRowReference("b", "m");
+
     gce.removeFileReference("e", "m", "../c/t-0/F00.rf");
+    gce.removeDirReference("e", "m");
+    gce.removePrevRowReference("e", "m");
+
     gce.removeFileReference("f", "m", "../d/t-0/F00.rf");
+    gce.removeDirReference("f", "m");
+    gce.removePrevRowReference("f", "m");
+
     gca.collect(gce);
     assertRemoved(gce, "/8/t-0", "hdfs://foo:6000/accumulo/tables/9/t-0", 
"/a/t-0",
         "hdfs://foo:6000/accumulo/tables/b/t-0", "/c/t-0", 
"hdfs://foo:6000/accumulo/tables/d/t-0");
@@ -373,18 +453,37 @@ public class GarbageCollectionTest {
     gce.candidates.add("hdfs://foo:6000/user/foo/tables/d/t-0");
 
     gce.addDirReference("4", null, "/t-0");
+    gce.addPrevRowReference("4", null);
     gce.addDirReference("5", null, "/t-0");
+    gce.addPrevRowReference("5", null);
     gce.addDirReference("6", null, 
"hdfs://foo.com:6000/user/foo/tables/6/t-0");
+    gce.addPrevRowReference("6", null);
     gce.addDirReference("7", null, 
"hdfs://foo.com:6000/user/foo/tables/7/t-0");
+    gce.addPrevRowReference("7", null);
 
     gce.addFileReference("8", "m", "/t-0/F00.rf");
+    gce.addDirReference("8", "m", "hdfs://foo.com:6000/user/foo/tables/8/t-0");
+    gce.addPrevRowReference("8", "m");
+
     gce.addFileReference("9", "m", "/t-0/F00.rf");
+    gce.addDirReference("9", "m", "hdfs://foo.com:6000/user/foo/tables/9/t-0");
+    gce.addPrevRowReference("9", "m");
 
     gce.addFileReference("a", "m", 
"hdfs://foo.com:6000/user/foo/tables/a/t-0/F00.rf");
+    gce.addDirReference("a", "m", "hdfs://foo.com:6000/user/foo/tables/a/t-0");
+    gce.addPrevRowReference("a", "m");
+
     gce.addFileReference("b", "m", 
"hdfs://foo.com:6000/user/foo/tables/b/t-0/F00.rf");
+    gce.addDirReference("b", "m", "hdfs://foo.com:6000/user/foo/tables/b/t-0");
+    gce.addPrevRowReference("b", "m");
 
     gce.addFileReference("e", "m", "../c/t-0/F00.rf");
+    gce.addDirReference("e", "m", "hdfs://foo.com:6000/user/foo/tables/c/t-0");
+    gce.addPrevRowReference("e", "m");
+
     gce.addFileReference("f", "m", "../d/t-0/F00.rf");
+    gce.addDirReference("f", "m", "hdfs://foo.com:6000/user/foo/tables/d/t-0");
+    gce.addPrevRowReference("f", "m");
 
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
@@ -394,24 +493,48 @@ public class GarbageCollectionTest {
 
     // Removing the dir reference for a table will delete all tablet 
directories
     gce.removeDirReference("5", null);
+    gce.removePrevRowReference("5", null);
     gca.collect(gce);
     assertRemoved(gce, "hdfs://foo.com:6000/user/foo/tables/5/t-0");
 
     gce.removeDirReference("4", null);
+    gce.removePrevRowReference("4", null);
     gca.collect(gce);
     assertRemoved(gce, "/4/t-0");
 
     gce.removeDirReference("6", null);
+    gce.removePrevRowReference("6", null);
+
     gce.removeDirReference("7", null);
+    gce.removePrevRowReference("7", null);
+
     gca.collect(gce);
     assertRemoved(gce, "/6/t-0", "hdfs://foo:6000/user/foo/tables/7/t-0/");
 
     gce.removeFileReference("8", "m", "/t-0/F00.rf");
+    gce.removeDirReference("8", "m");
+    gce.removePrevRowReference("8", "m");
+
     gce.removeFileReference("9", "m", "/t-0/F00.rf");
+    gce.removeDirReference("9", "m");
+    gce.removePrevRowReference("9", "m");
+
     gce.removeFileReference("a", "m", 
"hdfs://foo.com:6000/user/foo/tables/a/t-0/F00.rf");
+    gce.removeDirReference("a", "m");
+    gce.removePrevRowReference("a", "m");
+
     gce.removeFileReference("b", "m", 
"hdfs://foo.com:6000/user/foo/tables/b/t-0/F00.rf");
+    gce.removeDirReference("b", "m");
+    gce.removePrevRowReference("b", "m");
+
     gce.removeFileReference("e", "m", "../c/t-0/F00.rf");
+    gce.removeDirReference("e", "m");
+    gce.removePrevRowReference("e", "m");
+
     gce.removeFileReference("f", "m", "../d/t-0/F00.rf");
+    gce.removeDirReference("f", "m");
+    gce.removePrevRowReference("f", "m");
+
     gca.collect(gce);
     assertRemoved(gce, "/8/t-0", "hdfs://foo:6000/user/foo/tables/9/t-0", 
"/a/t-0",
         "hdfs://foo:6000/user/foo/tables/b/t-0", "/c/t-0", 
"hdfs://foo:6000/user/foo/tables/d/t-0");
@@ -470,8 +593,8 @@ public class GarbageCollectionTest {
   @Test
   public void testBadDeletes() throws Exception {
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
-
     TestGCE gce = new TestGCE();
+
     gce.candidates.add("");
     gce.candidates.add("A");
     gce.candidates.add("/");
@@ -492,12 +615,13 @@ public class GarbageCollectionTest {
 
   @Test
   public void test() throws Exception {
-
     GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
 
     TestGCE gce = new TestGCE();
+
     gce.candidates.add("/1636/default_tablet");
     gce.addDirReference("1636", null, "/default_tablet");
+    gce.addPrevRowReference("1636", null);
     gca.collect(gce);
     assertRemoved(gce);
 
@@ -515,6 +639,7 @@ public class GarbageCollectionTest {
     gce = new TestGCE();
     gce.addFileReference("1636", null, "../9/default_tablet/someFile");
     gce.addDirReference("1636", null, "/default_tablet");
+    gce.addPrevRowReference("1636", null);
     gce.candidates.add("/9/default_tablet/someFile");
     gca.collect(gce);
     assertRemoved(gce);
@@ -554,6 +679,7 @@ public class GarbageCollectionTest {
     gce.candidates.add("hdfs://foo:6000/accumulo/tables/7/t-0/");
 
     gce.addDirReference("7", null, 
"hdfs://foo.com:6000/accumulo/tables/7/t-0");
+    gce.addPrevRowReference("7", null);
 
     gca.collect(gce);
 
@@ -644,4 +770,114 @@ public class GarbageCollectionTest {
     assertEquals(1, gce.deletes.size());
     assertEquals("hdfs://foo.com:6000/accumulo/tables/2/t-00002/A000002.rf", 
gce.deletes.get(0));
   }
+
+  /**
+   * Minimal test to show that dir and prevRow are required for valid scan (go 
path)
+   */
+  @Test
+  public void testDirAndPrevRow() throws Exception {
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+
+    TestGCE gce = new TestGCE();
+    gce.candidates.add("/1636/default_tablet");
+    gce.addDirReference("1636", null, "/default_tablet");
+    gce.addPrevRowReference("1636", null);
+    gca.collect(gce);
+    assertEquals(0, gce.deletes.size());
+  }
+
+  /**
+   * Show that IllegalState is thrown when no dir entry present in metadata 
scan in last row seen.
+   */
+  @Test
+  public void testNoDirAsLastRow() {
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+
+    TestGCE gce = new TestGCE();
+    gce.candidates.add("/1636/default_tablet");
+    gce.addPrevRowReference("1636", null);
+    assertThrows(IllegalStateException.class, () -> gca.collect(gce));
+    assertEquals(1, gce.candidates.size());
+  }
+
+  /**
+   * Show that IllegalState is thrown when no dir entry present in metadata 
scan.
+   */
+  @Test
+  public void testNoDir() {
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+
+    TestGCE gce = new TestGCE();
+    gce.candidates.add("/1636/default_tablet/f1");
+    gce.addPrevRowReference("1636", "a");
+
+    gce.candidates.add("/1636/t1/f2");
+    gce.addDirReference("1636", null, "/t1");
+    gce.addPrevRowReference("1636", null);
+
+    assertThrows(IllegalStateException.class, () -> gca.collect(gce));
+    assertEquals(2, gce.candidates.size());
+  }
+
+  /**
+   * Show that IllegalState is thrown when no prev row present in metadata 
scan in last row seen.
+   */
+  @Test
+  public void testNoPrevRowAsLastRow() {
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+
+    TestGCE gce = new TestGCE();
+    gce.candidates.add("/1636/default_tablet");
+    gce.addDirReference("1636", null, "/default_tablet");
+    assertThrows(IllegalStateException.class, () -> gca.collect(gce));
+    assertEquals(1, gce.candidates.size());
+  }
+
+  /**
+   * Show that IllegalState is thrown when no prevRow entry present in 
metadata scan.
+   */
+  @Test
+  public void testPrevRow() {
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+
+    TestGCE gce = new TestGCE();
+    gce.candidates.add("/1636/default_tablet/f1");
+    gce.addDirReference("1636", "a", "/default_tablet");
+
+    gce.candidates.add("/1636/t1/f2");
+    gce.addDirReference("1636", null, "/t1");
+    gce.addPrevRowReference("1636", null);
+
+    assertThrows(IllegalStateException.class, () -> gca.collect(gce));
+    assertEquals(2, gce.candidates.size());
+  }
+
+  /**
+   * Show that IllegalState is thrown when no prevRow entry present in 
metadata scan.
+   */
+  @Test
+  public void testPrevRowOnly() {
+    TestGCE gce = new TestGCE();
+
+    gce.candidates.add("hdfs://foo:6000/accumulo/tables/4/t0/F000.rf");
+    gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/4/t0/F001.rf");
+    gce.candidates.add("hdfs://foo.com:6000/accumulo/tables/5/t0/F005.rf");
+
+    gce.addPrevRowReference("1636", null);
+
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+    assertThrows(IllegalStateException.class, () -> gca.collect(gce));
+    assertEquals(3, gce.candidates.size());
+  }
+
+  @Test
+  public void testNoPrevRowNoDir() throws Exception {
+
+    GarbageCollectionAlgorithm gca = new GarbageCollectionAlgorithm();
+
+    TestGCE gce = new TestGCE();
+    gce.candidates.add("/1636/default_tablet");
+    gce.addFileReference("b", "m", 
"hdfs://foo.com:6000/user/foo/tables/b/t-0/F00.rf");
+    assertThrows(IllegalStateException.class, () -> gca.collect(gce));
+  }
 }

Reply via email to