This is an automated email from the ASF dual-hosted git repository. ddanielr pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit 1aa81d81b0ff0da4dcd9ed4e33e599d701297c01 Merge: 6cfa1b2ea7 01f48adf38 Author: Daniel Roberts <ddani...@gmail.com> AuthorDate: Wed Dec 13 15:35:48 2023 +0000 Merge branch '2.1' .../core/client/MutationsRejectedException.java | 11 ++- .../server/constraints/MetadataConstraints.java | 20 +---- .../constraints/MetadataConstraintsTest.java | 94 ++++------------------ 3 files changed, 24 insertions(+), 101 deletions(-) diff --cc server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java index aa9772a159,ced97d0a68..617bf20e86 --- a/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java +++ b/server/base/src/main/java/org/apache/accumulo/server/constraints/MetadataConstraints.java @@@ -390,9 -327,7 +374,9 @@@ public class MetadataConstraints implem case 7: return "Lock not held in zookeeper by writer"; case 8: - return "Bulk load transaction no longer running"; + return "Bulk load mutation contains either inconsistent files or multiple fateTX ids"; + case 9: + return "Invalid data file metadata format"; } return null; } diff --cc server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java index 38f6d27ad2,bd59fef1e7..135a2bce60 --- a/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java +++ b/server/base/src/test/java/org/apache/accumulo/server/constraints/MetadataConstraintsTest.java @@@ -40,8 -33,6 +40,7 @@@ import org.apache.accumulo.core.metadat import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.ServerColumnFamily; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.TabletColumnFamily; import org.apache.accumulo.server.ServerContext; - import org.apache.accumulo.server.zookeeper.TransactionWatcher.Arbitrator; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.easymock.EasyMock; import org.junit.jupiter.api.Test; @@@ -158,376 -128,77 +136,332 @@@ public class MetadataConstraintsTest Mutation m; List<Short> violations; - // inactive txid - m = new Mutation(new Text("0;foo")); - m.put( - BulkFileColumnFamily.NAME, StoredTabletFile - .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new Value("12345")); - m.put( - DataFileColumnFamily.NAME, StoredTabletFile - .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new DataFileValue(1, 1).encodeAsValue()); - assertViolation(mc, m, (short) 8); - - // txid that throws exception - m = new Mutation(new Text("0;foo")); - m.put( - BulkFileColumnFamily.NAME, StoredTabletFile - .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new Value("9")); - m.put( - DataFileColumnFamily.NAME, StoredTabletFile - .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new DataFileValue(1, 1).encodeAsValue()); - assertViolation(mc, m, (short) 8); - - // active txid w/ file + // loaded marker w/ file m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); - m.put(DataFileColumnFamily.NAME, new Text("/someFile"), + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); + m.put( + DataFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); violations = mc.check(createEnv(), m); assertNull(violations); - // active txid w/o file + // loaded marker w/o file m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); + assertViolation(mc, m, (short) 8); - // two active txids w/ files + // two files w/ same txid m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); - m.put(DataFileColumnFamily.NAME, new Text("/someFile"), + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); + m.put( + DataFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("5")); - m.put(DataFileColumnFamily.NAME, new Text("/someFile2"), + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2")).getMetadataText(), - new Value("7")); ++ new Value("5")); + m.put( + DataFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2")).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - assertViolation(mc, m, (short) 8); + violations = mc.check(createEnv(), m); + assertNull(violations); - // two files w/ one active txid + // two files w/ different txid m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); - m.put(DataFileColumnFamily.NAME, new Text("/someFile"), + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); + m.put( + DataFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("7")); - m.put(DataFileColumnFamily.NAME, new Text("/someFile2"), + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2")).getMetadataText(), - new Value("5")); ++ new Value("7")); + m.put( + DataFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2")).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); -- violations = mc.check(createEnv(), m); - assertNull(violations); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); ++ assertViolation(mc, m, (short) 8); - // two loaded w/ one active txid and one file + // two loaded markers but only one file. m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); - m.put(DataFileColumnFamily.NAME, new Text("/someFile"), + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); + m.put( + DataFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), new DataFileValue(1, 1).encodeAsValue()); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile2"), new Value("5")); - violations = mc.check(createEnv(), m); - assertNotNull(violations); - assertEquals(1, violations.size()); - assertEquals(Short.valueOf((short) 8), violations.get(0)); + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile2")).getMetadataText(), + new Value("5")); + assertViolation(mc, m, (short) 8); - // active txid, mutation that looks like split + // mutation that looks like split m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1")); violations = mc.check(createEnv(), m); assertNull(violations); - // inactive txid, mutation that looks like split - m = new Mutation(new Text("0;foo")); - m.put( - BulkFileColumnFamily.NAME, StoredTabletFile - .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new Value("12345")); - ServerColumnFamily.DIRECTORY_COLUMN.put(m, new Value("/t1")); - violations = mc.check(createEnv(), m); - assertNull(violations); - - // active txid, mutation that looks like a load + // mutation that looks like a load m = new Mutation(new Text("0;foo")); - m.put(BulkFileColumnFamily.NAME, new Text("/someFile"), new Value("5")); + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new Value("127.0.0.1:9997")); violations = mc.check(createEnv(), m); assertNull(violations); - // inactive txid, mutation that looks like a load - m = new Mutation(new Text("0;foo")); - m.put( - BulkFileColumnFamily.NAME, StoredTabletFile - .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), - new Value("12345")); - m.put(CurrentLocationColumnFamily.NAME, new Text("789"), new Value("127.0.0.1:9997")); - violations = mc.check(createEnv(), m); - assertNull(violations); - // deleting a load flag m = new Mutation(new Text("0;foo")); - m.putDelete(BulkFileColumnFamily.NAME, new Text("/someFile")); + m.putDelete(BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText()); violations = mc.check(createEnv(), m); assertNull(violations); + // Missing beginning of path + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text(StoredTabletFile.of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")) + .getMetadata() + .replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "/someFile")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Missing tables directory in path + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text(StoredTabletFile.of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")) + .getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", + "hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // No DataFileColumnFamily included + m = new Mutation(new Text("0;foo")); + m.put( + BulkFileColumnFamily.NAME, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new Value("5")); + assertViolation(mc, m, (short) 8); + + // Bad Json - only path (old format) so should fail parsing + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, new Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key is missing so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text( + "{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path key replaced with empty string so validation should fail + // {"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put( + BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", "")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path value missing + // {"path":"","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text(StoredTabletFile.of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")) + .getMetadata().replaceFirst("\"path\":\".*\",\"startRow", "\"path\":\"\",\"startRow")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key replaced with empty string so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow", "")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - test endRow key missing so validation should fail + m = new Mutation(new Text("0;foo")); + m.put( + BulkFileColumnFamily.NAME, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow", "")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + // Bad Json - endRow will be replaced with encoded row without the exclusive byte 0x00 which is + // required for an endRow so will fail validation + m = new Mutation(new Text("0;foo")); + m.put(BulkFileColumnFamily.NAME, + new Text(StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b")) + .getMetadata().replaceFirst("\"endRow\":\".*\"", + "\"endRow\":\"" + encodeRowForMetadata("bad") + "\"")), + new Value("5")); + assertViolation(mc, m, (short) 9); + + } + + @Test + public void testDataFileCheck() { + testFileMetadataValidation(DataFileColumnFamily.NAME, new DataFileValue(1, 1).encodeAsValue()); + } + + @Test + public void testScanFileCheck() { + testFileMetadataValidation(ScanFileColumnFamily.NAME, new Value()); + } + + private void testFileMetadataValidation(Text columnFamily, Value value) { - MetadataConstraints mc = new TestMetadataConstraints(); ++ MetadataConstraints mc = new MetadataConstraints(); + Mutation m; + List<Short> violations; + + // Missing beginning of path + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text(StoredTabletFile.of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")) + .getMetadata() + .replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", "/someFile")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - only path (old format) so should fail parsing + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, new Text("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path key replaced with empty string so validation should fail + // {"":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put( + columnFamily, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("path", "")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test path value missing + // {"path":"","startRow":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text(StoredTabletFile.of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")) + .getMetadata().replaceFirst("\"path\":\".*\",\"startRow", "\"path\":\"\",\"startRow")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key replaced with empty string so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("startRow", "")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test startRow key is missing so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text( + "{\"path\":\"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile\",\"endRow\":\"\"}"), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - test endRow key replaced with empty string so validation should fail + // {"path":"hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile","":"","endRow":""} + m = new Mutation(new Text("0;foo")); + m.put( + columnFamily, new Text(StoredTabletFile + .serialize("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile").replace("endRow", "")), + value); + assertViolation(mc, m, (short) 9); + + // Bad Json - endRow will be replaced with encoded row without the exclusive byte 0x00 which is + // required for an endRow so this will fail validation + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text(StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b")) + .getMetadata() + .replaceFirst("\"endRow\":\".*\"", "\"endRow\":\"" + encodeRowForMetadata("b") + "\"")), + value); + assertViolation(mc, m, (short) 9); + + // Missing tables directory in path + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + new Text(StoredTabletFile.of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")) + .getMetadata().replace("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile", + "hdfs://1.2.3.4/accumulo/2a/t-0003/someFile")), + new DataFileValue(1, 1).encodeAsValue()); + assertViolation(mc, m, (short) 9); + + // Should pass validation (inf range) + m = new Mutation(new Text("0;foo")); + m.put( + columnFamily, StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile")).getMetadataText(), + new DataFileValue(1, 1).encodeAsValue()); + violations = mc.check(createEnv(), m); + assertNull(violations); + + // Should pass validation with range set + m = new Mutation(new Text("0;foo")); + m.put(columnFamily, + StoredTabletFile + .of(new Path("hdfs://1.2.3.4/accumulo/tables/2a/t-0003/someFile"), new Range("a", "b")) + .getMetadataText(), + new DataFileValue(1, 1).encodeAsValue()); + violations = mc.check(createEnv(), m); + assertNull(violations); + + assertNotNull(mc.getViolationDescription((short) 9)); + } + + // Encode a row how it would appear in Json + private static String encodeRowForMetadata(String row) { + try { + Method method = StoredTabletFile.class.getDeclaredMethod("encodeRow", Key.class); + method.setAccessible(true); + return Base64.getUrlEncoder() + .encodeToString((byte[]) method.invoke(StoredTabletFile.class, new Key(row))); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + private void assertViolation(MetadataConstraints mc, Mutation m, Short violation) { + List<Short> violations = mc.check(createEnv(), m); + assertNotNull(violations); + assertEquals(1, violations.size()); + assertEquals(violation, violations.get(0)); + assertNotNull(mc.getViolationDescription(violations.get(0))); } }