This is an automated email from the ASF dual-hosted git repository.

cshannon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 191ea250bf Add more file metadata validation tests (#3774)
191ea250bf is described below

commit 191ea250bf9caed4e663ec3b4b66a8d2faee7452
Author: Christopher L. Shannon <christopher.l.shan...@gmail.com>
AuthorDate: Sun Oct 1 11:03:06 2023 -0400

    Add more file metadata validation tests (#3774)
    
    This addresses part of #3766 and adds more tests to verify file metadata
    validation works properly for columns that used StoredTabletFile
    metadata. Tests include verifying old format is invalid, the json
    format is correctly validated and rows are a row range
---
 .../accumulo/core/metadata/StoredTabletFile.java   |   8 +
 .../constraints/MetadataConstraintsTest.java       | 209 ++++++++++++++++-----
 2 files changed, 167 insertions(+), 50 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java 
b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
index 68475ae65c..7a69d4f6f8 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/StoredTabletFile.java
@@ -177,6 +177,14 @@ public class StoredTabletFile extends 
AbstractTabletFile<StoredTabletFile> {
   private static TabletFileCq deserialize(String json) {
     final TabletFileCqMetadataGson metadata =
         gson.fromJson(Objects.requireNonNull(json), 
TabletFileCqMetadataGson.class);
+
+    // Check each field and provide better error messages if null as all 
fields should be set
+    Objects.requireNonNull(metadata.path, "Serialized StoredTabletFile path 
must not be null");
+    Objects.requireNonNull(metadata.startRow,
+        "Serialized StoredTabletFile range startRow must not be null");
+    Objects.requireNonNull(metadata.endRow,
+        "Serialized StoredTabletFile range endRow must not be null");
+
     // Recreate the exact Range that was originally stored in Metadata. Stored 
ranges are originally
     // constructed with inclusive/exclusive for the start and end key 
inclusivity settings.
     // (Except for Ranges with no start/endkey as then the inclusivity flags 
do not matter)
diff --git 
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
 
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
index 1aeadb6736..f891a0b132 100644
--- 
a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
+++ 
b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java
@@ -22,9 +22,12 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertNull;
 
+import java.lang.reflect.Method;
 import java.net.URI;
+import java.util.Base64;
 import java.util.List;
 
+import org.apache.accumulo.core.data.Key;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.data.Range;
 import org.apache.accumulo.core.data.Value;
@@ -167,10 +170,7 @@ public class MetadataConstraintsTest {
             
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new 
Range())
             .getMetadataText(),
         new DataFileValue(1, 1).encodeAsValue());
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 8), violations.get(0));
+    assertViolation(mc, m, (short) 8);
 
     // txid that throws exception
     m = new Mutation(new Text("0;foo"));
@@ -184,10 +184,7 @@ public class MetadataConstraintsTest {
             
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new 
Range())
             .getMetadataText(),
         new DataFileValue(1, 1).encodeAsValue());
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 8), violations.get(0));
+    assertViolation(mc, m, (short) 8);
 
     // active txid w/ file
     m = new Mutation(new Text("0;foo"));
@@ -211,10 +208,7 @@ public class MetadataConstraintsTest {
             
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new 
Range())
             .getMetadataText(),
         new Value("5"));
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 8), violations.get(0));
+    assertViolation(mc, m, (short) 8);
 
     // two active txids w/ files
     m = new Mutation(new Text("0;foo"));
@@ -238,10 +232,7 @@ public class MetadataConstraintsTest {
             
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2"), new 
Range())
             .getMetadataText(),
         new DataFileValue(1, 1).encodeAsValue());
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 8), violations.get(0));
+    assertViolation(mc, m, (short) 8);
 
     // two files w/ one active txid
     m = new Mutation(new Text("0;foo"));
@@ -285,10 +276,7 @@ public class MetadataConstraintsTest {
             
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2"), new 
Range())
             .getMetadataText(),
         new Value("5"));
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 8), violations.get(0));
+    assertViolation(mc, m, (short) 8);
 
     // active txid, mutation that looks like split
     m = new Mutation(new Text("0;foo"));
@@ -349,11 +337,7 @@ public class MetadataConstraintsTest {
         .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range())
         
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", 
"/someFile")),
         new Value("5"));
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 9), violations.get(0));
-    assertNotNull(mc.getViolationDescription(violations.get(0)));
+    assertViolation(mc, m, (short) 9);
 
     // Missing tables directory in path
     m = new Mutation(new Text("0;foo"));
@@ -363,10 +347,7 @@ public class MetadataConstraintsTest {
             
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile",
                 "hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")),
         new Value("5"));
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 9), violations.get(0));
+    assertViolation(mc, m, (short) 9);
 
     // No DataFileColumnFamily included
     m = new Mutation(new Text("0;foo"));
@@ -375,11 +356,67 @@ public class MetadataConstraintsTest {
             
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new 
Range())
             .getMetadataText(),
         new Value("5"));
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 8), violations.get(0));
-    assertNotNull(mc.getViolationDescription(violations.get(0)));
+    assertViolation(mc, m, (short) 8);
+
+    // Bad Json - only path (old format) so should fail parsing
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME, new 
Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test startRow key is missing so validation should fail
+    // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME,
+        new Text(
+            
"{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test path key replaced with empty string so validation 
should fail
+    // 
{"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(
+        BulkFileColumnFamily.NAME, new Text(StoredTabletFile
+            
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", 
"")),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test path value missing
+    // {"path":"","startRow":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME,
+        new Text(StoredTabletFile
+            
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new 
Range())
+            .getMetadata().replaceFirst("\"path\":\".*\",\"startRow", 
"\"path\":\"\",\"startRow")),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test startRow key replaced with empty string so validation 
should fail
+    // 
{"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile
+        
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow",
 "")),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test endRow key missing so validation should fail
+    m = new Mutation(new Text("0;foo"));
+    m.put(
+        BulkFileColumnFamily.NAME, new Text(StoredTabletFile
+            
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow",
 "")),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - endRow will be replaced with encoded row without the 
exclusive byte 0x00 which is
+    // required for an endRow so will fail validation
+    m = new Mutation(new Text("0;foo"));
+    m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile
+        .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range("a", "b"))
+        .getMetadata()
+        .replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + 
encodeRowForMetadata("bad") + "\"")),
+        new Value("5"));
+    assertViolation(mc, m, (short) 9);
 
   }
 
@@ -404,20 +441,67 @@ public class MetadataConstraintsTest {
         .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range())
         
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", 
"/someFile")),
         value);
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 9), violations.get(0));
-    assertNotNull(mc.getViolationDescription(violations.get(0)));
+    assertViolation(mc, m, (short) 9);
 
-    // Bad Json - only path so should fail parsing
+    // Bad Json - only path (old format) so should fail parsing
     m = new Mutation(new Text("0;foo"));
     m.put(columnFamily, new 
Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), value);
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 9), violations.get(0));
-    assertNotNull(mc.getViolationDescription(violations.get(0)));
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test path key replaced with empty string so validation 
should fail
+    // 
{"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(
+        columnFamily, new Text(StoredTabletFile
+            
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", 
"")),
+        value);
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test path value missing
+    // {"path":"","startRow":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily,
+        new Text(StoredTabletFile
+            
.of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new 
Range())
+            .getMetadata().replaceFirst("\"path\":\".*\",\"startRow", 
"\"path\":\"\",\"startRow")),
+        value);
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test startRow key replaced with empty string so validation 
should fail
+    // 
{"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily, new Text(StoredTabletFile
+        
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow",
 "")),
+        value);
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test startRow key is missing so validation should fail
+    // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily,
+        new Text(
+            
"{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"),
+        value);
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - test endRow key replaced with empty string so validation 
should fail
+    // 
{"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""}
+    m = new Mutation(new Text("0;foo"));
+    m.put(
+        columnFamily, new Text(StoredTabletFile
+            
.serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow",
 "")),
+        value);
+    assertViolation(mc, m, (short) 9);
+
+    // Bad Json - endRow will be replaced with encoded row without the 
exclusive byte 0x00 which is
+    // required for an endRow so this will fail validation
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily, new Text(StoredTabletFile
+        .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range("a", "b"))
+        .getMetadata()
+        .replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + 
encodeRowForMetadata("b") + "\"")),
+        value);
+    assertViolation(mc, m, (short) 9);
 
     // Missing tables directory in path
     m = new Mutation(new Text("0;foo"));
@@ -427,12 +511,9 @@ public class MetadataConstraintsTest {
             
.getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile",
                 "hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")),
         new DataFileValue(1, 1).encodeAsValue());
-    violations = mc.check(createEnv(), m);
-    assertNotNull(violations);
-    assertEquals(1, violations.size());
-    assertEquals(Short.valueOf((short) 9), violations.get(0));
+    assertViolation(mc, m, (short) 9);
 
-    // Should pass validation
+    // Should pass validation (inf range)
     m = new Mutation(new Text("0;foo"));
     m.put(columnFamily,
         StoredTabletFile
@@ -442,7 +523,35 @@ public class MetadataConstraintsTest {
     violations = mc.check(createEnv(), m);
     assertNull(violations);
 
+    // Should pass validation with range set
+    m = new Mutation(new Text("0;foo"));
+    m.put(columnFamily, StoredTabletFile
+        .of(URI.create("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), 
new Range("a", "b"))
+        .getMetadataText(), new DataFileValue(1, 1).encodeAsValue());
+    violations = mc.check(createEnv(), m);
+    assertNull(violations);
+
     assertNotNull(mc.getViolationDescription((short) 9));
   }
 
+  // Encode a row how it would appear in Json
+  private static String encodeRowForMetadata(String row) {
+    try {
+      Method method = StoredTabletFile.class.getDeclaredMethod("encodeRow", 
Key.class);
+      method.setAccessible(true);
+      return Base64.getUrlEncoder()
+          .encodeToString((byte[]) method.invoke(StoredTabletFile.class, new 
Key(row)));
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private void assertViolation(MetadataConstraints mc, Mutation m, Short 
violation) {
+    List<Short> violations = mc.check(createEnv(), m);
+    assertNotNull(violations);
+    assertEquals(1, violations.size());
+    assertEquals(violation, violations.get(0));
+    assertNotNull(mc.getViolationDescription(violations.get(0)));
+  }
+
 }

Reply via email to