This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new a4eb2240cd improved validation of load plan json (#5465) a4eb2240cd is described below commit a4eb2240cd79adc6f5fb3b552bfb1f92f1c369ee Author: Keith Turner <ktur...@apache.org> AuthorDate: Thu Apr 10 11:13:50 2025 -0400 improved validation of load plan json (#5465) Converting bulk load plan json to a LoadPlan was not very strict and for cases that it did fail it threw many different runtime exceptions. Made validation of the json more strict and made the code always throw the same exception type when json is not valid. Added unit test for many different cases of illegal input. Co-authored-by: Christopher Tubbs <ctubb...@apache.org> --- .../org/apache/accumulo/core/data/LoadPlan.java | 46 ++++++++++++++++++++-- .../apache/accumulo/core/data/LoadPlanTest.java | 36 +++++++++++++++++ .../apache/accumulo/test/functional/BulkNewIT.java | 5 +++ 3 files changed, 83 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/data/LoadPlan.java b/core/src/main/java/org/apache/accumulo/core/data/LoadPlan.java index 7e332d85fd..7b2d9cce05 100644 --- a/core/src/main/java/org/apache/accumulo/core/data/LoadPlan.java +++ b/core/src/main/java/org/apache/accumulo/core/data/LoadPlan.java @@ -21,6 +21,7 @@ package org.apache.accumulo.core.data; import static java.nio.charset.StandardCharsets.UTF_8; import java.io.IOException; +import java.lang.reflect.Field; import java.net.URI; import java.nio.file.Paths; import java.util.Arrays; @@ -29,6 +30,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; import java.util.SortedSet; import java.util.function.Function; import java.util.stream.Collectors; @@ -45,6 +47,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.primitives.UnsignedBytes; import com.google.gson.Gson; import com.google.gson.GsonBuilder; +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; @@ -139,7 +143,7 @@ public class LoadPlan { "Start row is greater than or equal to end row : " + srs + " " + ers); } } else { - throw new IllegalStateException(); + throw new IllegalStateException("Unknown range type : " + rangeType); } } @@ -325,15 +329,49 @@ public class LoadPlan { return gson.toJson(new JsonAll(destinations)); } + private static final Set<String> EXPECTED_FIELDS = Arrays + .stream(JsonAll.class.getDeclaredFields()).map(Field::getName).collect(Collectors.toSet()); + private static final Set<String> EXPECTED_DEST_FIELDS = + Arrays.stream(JsonDestination.class.getDeclaredFields()).map(Field::getName) + .collect(Collectors.toSet()); + /** * Deserializes json to a load plan. * * @param json produced by {@link #toJson()} + * @throws IllegalArgumentException when illegal json is given or legal json that does not follow + * the load plan schema is given. The minimal acceptable json is + * {@code {"destinations":[]}}. + * @throws NullPointerException when json argument is null */ public static LoadPlan fromJson(String json) { - var dests = gson.fromJson(json, JsonAll.class).destinations.stream() - .map(JsonDestination::toDestination).collect(Collectors.toUnmodifiableList()); - return new LoadPlan(dests); + try { + Objects.requireNonNull(json); + Preconditions.checkArgument(!json.isBlank(), "Empty json is not accepted."); + // https://github.com/google/gson/issues/188 Gson does not support failing when extra fields + // are present. This is custom code to check for extra fields. + var jsonObj = gson.fromJson(json, JsonObject.class); + Preconditions.checkArgument(jsonObj.keySet().equals(EXPECTED_FIELDS), + "Expected fields %s and saw %s", EXPECTED_FIELDS, jsonObj.keySet()); + Preconditions.checkArgument(jsonObj.get("destinations") instanceof JsonArray, + "Expected value of destinations field to be array"); + var destinations = jsonObj.getAsJsonArray("destinations"); + destinations.forEach(dest -> { + var keySet = dest.getAsJsonObject().keySet(); + Preconditions.checkArgument(keySet.equals(EXPECTED_DEST_FIELDS), + "Expected fields %s and saw %s", EXPECTED_DEST_FIELDS, keySet); + }); + + var dests = gson.fromJson(json, JsonAll.class).destinations.stream() + .map(JsonDestination::toDestination).collect(Collectors.toUnmodifiableList()); + return new LoadPlan(dests); + } catch (NullPointerException | IllegalArgumentException e) { + throw e; + } catch (RuntimeException e) { + // GSon code can throw a few runtime exceptions, lets not let Gson exceptions escape directly + // so that its easier to change the implementation away from gson in the future. + throw new IllegalArgumentException(e); + } } /** diff --git a/core/src/test/java/org/apache/accumulo/core/data/LoadPlanTest.java b/core/src/test/java/org/apache/accumulo/core/data/LoadPlanTest.java index e385c3e673..ae63ccc5f6 100644 --- a/core/src/test/java/org/apache/accumulo/core/data/LoadPlanTest.java +++ b/core/src/test/java/org/apache/accumulo/core/data/LoadPlanTest.java @@ -23,9 +23,11 @@ import static java.util.stream.Collectors.toSet; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import java.util.ArrayList; import java.util.Base64; import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.stream.Collectors; @@ -138,6 +140,40 @@ public class LoadPlanTest { assertEquals(expected.replace("'", "\""), json); } + @Test + public void testIllegalJson() { + assertThrows(NullPointerException.class, () -> LoadPlan.fromJson(null)); + + List<String> illegalJson = new ArrayList<>(); + // Test json with extraneous stuff in it + illegalJson.add("{'dest':[],'destinations':[]}"); + // lets try XML + illegalJson.add("<destinations></destinations>"); + // try an empty string + illegalJson.add(""); + illegalJson.add(" "); + // try incomplete json + illegalJson.add("{'destinations':[{'fileName':'f1.rf'"); + // try extra field in the destination + illegalJson.add( + "{'destinations':[{'host':'localhost',fileName':'f1.rf','startRow':null,'endRow':'g','rangeType':'TABLE'}]}"); + // try an illegal range type + illegalJson.add( + "{'destinations':[{'fileName':'f1.rf','startRow':null,'endRow':null,'rangeType':'LARGE'}]}"); + // try object value instead of array for destinations field + illegalJson.add( + "{'destinations':{'fileName':'f1.rf','startRow':null,'endRow': null,'rangeType':'TABLE'}}"); + // try array of array value instead of array for destinations field + illegalJson.add( + "{'destinations':[[{'fileName':'f1.rf','startRow':null,'endRow': null,'rangeType':'TABLE'}]]}"); + // try a row value that is not valid base 64 + illegalJson.add( + "{'destinations':[{'fileName':'f1.rf','startRow':null,'endRow': '~!@#$%^&*()_+','rangeType':'TABLE'}]}"); + + illegalJson.forEach(json -> assertThrows(IllegalArgumentException.class, + () -> LoadPlan.fromJson(json.replace("'", "\"")))); + } + @Test public void testTableSplits() { assertThrows(IllegalArgumentException.class, diff --git a/test/src/main/java/org/apache/accumulo/test/functional/BulkNewIT.java b/test/src/main/java/org/apache/accumulo/test/functional/BulkNewIT.java index 26784ccfb5..6ae4d025a9 100644 --- a/test/src/main/java/org/apache/accumulo/test/functional/BulkNewIT.java +++ b/test/src/main/java/org/apache/accumulo/test/functional/BulkNewIT.java @@ -476,6 +476,11 @@ public class BulkNewIT extends SharedMiniClusterBase { .loadFileTo("f2.rf", RangeType.TABLE, null, row(555)).build(); final var nonExistentBoundary = importMappingOptions.plan(loadPlan); assertThrows(AccumuloException.class, nonExistentBoundary::load); + + // Create an empty load plan + loadPlan = LoadPlan.builder().build(); + final var emptyLoadPlan = importMappingOptions.plan(loadPlan); + assertThrows(IllegalArgumentException.class, emptyLoadPlan::load); } }