This is an automated email from the ASF dual-hosted git repository.
opwvhk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/main by this push:
new 09277d0deb AVRO-4039 [java] fix GenericData.newArray to only return an
appropriate array implementation (#3307)
09277d0deb is described below
commit 09277d0deb7b5a04fc5e5430c56f4fd160df01fb
Author: Mike Skells <[email protected]>
AuthorDate: Tue May 6 12:46:41 2025 +0100
AVRO-4039 [java] fix GenericData.newArray to only return an appropriate
array implementation (#3307)
* AVRO-4039 fix GenericData.newArray
only return an appropriate array
* AVRO-4039 fix GenericData.newArray
only return an appropriate array
* AVRO-4039 fix GenericData.newArray
spotless
* AVRO-4039
fix import that spotless removed
* AVRO-4039
review feedback
remove schema check on returned value
Check convertors with logical types
* AVRO-4039
review feedback
* AVRO-4039
review feedback
---
.../java/org/apache/avro/generic/GenericData.java | 86 +++++--
.../org/apache/avro/generic/PrimitivesArrays.java | 88 +++++--
.../org/apache/avro/generic/GenericDataTest.java | 262 +++++++++++++++++++++
3 files changed, 395 insertions(+), 41 deletions(-)
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
index 362ebdc9cf..77ae76007e 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
@@ -57,6 +57,7 @@ import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.FastReaderBuilder;
import org.apache.avro.util.Utf8;
import org.apache.avro.util.internal.Accessor;
+import org.apache.avro.generic.PrimitivesArrays.PrimitiveArray;
import com.fasterxml.jackson.databind.JsonNode;
import org.apache.avro.util.springframework.ConcurrentReferenceHashMap;
@@ -1515,38 +1516,73 @@ public class GenericData {
}
- /*
+ /**
* Called to create new array instances. Subclasses may override to use a
- * different array implementation. By default, this returns a {@link
- * GenericData.Array}.
+ * different array implementation. By default, this returns a
+ * {@link GenericData.Array}.
+ *
+ * @param old the old array instance to reuse, if possible. If the old
array
+ * is an appropriate type, it may be cleared and returned.
+ * @param size the size of the array to create.
+ * @param schema the schema of the array elements.
*/
public Object newArray(Object old, int size, Schema schema) {
- if (old instanceof GenericArray) {
- ((GenericArray<?>) old).reset();
- return old;
- } else if (old instanceof Collection) {
- ((Collection<?>) old).clear();
- return old;
- } else {
- if (schema.getElementType().getType() == Type.INT) {
- return new PrimitivesArrays.IntArray(size, schema);
- }
- if (schema.getElementType().getType() == Type.BOOLEAN) {
- return new PrimitivesArrays.BooleanArray(size, schema);
- }
- if (schema.getElementType().getType() == Type.LONG) {
- return new PrimitivesArrays.LongArray(size, schema);
- }
- if (schema.getElementType().getType() == Type.FLOAT) {
- return new PrimitivesArrays.FloatArray(size, schema);
- }
- if (schema.getElementType().getType() == Type.DOUBLE) {
- return new PrimitivesArrays.DoubleArray(size, schema);
+ final var logicalType = schema.getElementType().getLogicalType();
+ final var conversion = getConversionFor(logicalType);
+ final var optimalValueType = optimalValueType(schema, logicalType,
+ conversion == null ? null : conversion.getConvertedType());
+
+ if (old != null) {
+ if (old instanceof GenericData.Array<?>) {
+ ((GenericData.Array<?>) old).reset();
+ return old;
+ } else if (old instanceof PrimitiveArray) {
+ var primitiveOld = (PrimitiveArray<?>) old;
+ if (primitiveOld.valueType() == optimalValueType) {
+ primitiveOld.reset();
+ return old;
+ }
+ } else if (old instanceof Collection) {
+ ((Collection<?>) old).clear();
+ return old;
}
- return new GenericData.Array<Object>(size, schema);
}
+ // we can't reuse the old array, so we create a new one
+ return PrimitivesArrays.createOptimizedArray(size, schema,
optimalValueType);
}
+ /**
+ * Determine the optimal value type for an array. The value type is
determined
+ * form the convertedElementType if supplied, otherwise the underlying type
from
+ * the schema
+ *
+ * @param schema the schema of the array
+ * @param convertedElementType the converted elements value type. This may
not
+ * be the same and the schema if for instance
there
+ * is a logical type, and a convertor is use
+ * @return an indicator for the type of the array, useful for
+ * {@link PrimitivesArrays#createOptimizedArray(int, Schema,
Schema.Type)}.
+ * May be null if the type is not optimised
+ */
+ public static Schema.Type optimalValueType(Schema schema, LogicalType
logicalType, Class<?> convertedElementType) {
+ if (logicalType == null)
+ // if there are no logical types- use the schema type
+ return schema.getElementType().getType();
+ else if (convertedElementType == null)
+ // if there is no convertor
+ return null;
+ else
+ // use the converted type
+ return PRIMITIVE_TYPES_WITH_SPECIALISED_ARRAYS.get(convertedElementType);
+ }
+
+ private final static Map<Class<?>, Schema.Type>
PRIMITIVE_TYPES_WITH_SPECIALISED_ARRAYS = Map.of(//
+ Long.TYPE, Schema.Type.LONG, //
+ Integer.TYPE, Schema.Type.INT, //
+ Float.TYPE, Schema.Type.FLOAT, //
+ Double.TYPE, Schema.Type.DOUBLE, //
+ Boolean.TYPE, Schema.Type.BOOLEAN);
+
/**
* Called to create new array instances. Subclasses may override to use a
* different map implementation. By default, this returns a {@link HashMap}.
diff --git
a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
index 91540005a9..34b69acdd0 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
@@ -17,7 +17,6 @@
*/
package org.apache.avro.generic;
-import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import java.util.Arrays;
@@ -25,15 +24,55 @@ import java.util.Collection;
public class PrimitivesArrays {
- public static class IntArray extends GenericData.AbstractArray<Integer> {
+ /**
+ * Create a primitive array if the value type is has an associated optimised
+ * implementation, otherwise a generic array is returned. The value type is
+ * determined form the convertedElementType if supplied, otherwise the
+ * underlying type from the schema
+ *
+ * @param size the size of the array to create
+ * @param schema the schema of the array
+ * @param valueType the converted elements value type. This may not be the
same
+ * and the schema if for instance there is a logical type,
and
+ * a convertor is use
+ * @return an instance of a primitive array or a Generic array if the value
type
+ * is does not have an associated optimised implementation.
+ */
+ public static GenericData.AbstractArray<?> createOptimizedArray(int size,
Schema schema, Schema.Type valueType) {
+
+ if (valueType != null)
+ switch (valueType) {
+ case INT:
+ return new PrimitivesArrays.IntArray(size, schema);
+ case BOOLEAN:
+ return new PrimitivesArrays.BooleanArray(size, schema);
+ case LONG:
+ return new PrimitivesArrays.LongArray(size, schema);
+ case FLOAT:
+ return new PrimitivesArrays.FloatArray(size, schema);
+ case DOUBLE:
+ return new PrimitivesArrays.DoubleArray(size, schema);
+ default:
+ break;
+ }
+ return new GenericData.Array<>(size, schema);
+ }
+
+ public abstract static class PrimitiveArray<T> extends
GenericData.AbstractArray<T> {
+ PrimitiveArray(Schema schema) {
+ super(schema);
+ }
+
+ public abstract Schema.Type valueType();
+ }
+
+ public static class IntArray extends PrimitiveArray<Integer> {
private static final int[] EMPTY = new int[0];
private int[] elements = EMPTY;
public IntArray(int capacity, Schema schema) {
super(schema);
- if (!Schema.Type.INT.equals(schema.getElementType().getType()))
- throw new AvroRuntimeException("Not a int array schema: " + schema);
if (capacity != 0)
elements = new int[capacity];
}
@@ -127,17 +166,20 @@ public class PrimitivesArrays {
elements[index1] = elements[index2];
elements[index2] = tmp;
}
+
+ @Override
+ public Schema.Type valueType() {
+ return Schema.Type.INT;
+ }
}
- public static class LongArray extends GenericData.AbstractArray<Long> {
+ public static class LongArray extends PrimitiveArray<Long> {
private static final long[] EMPTY = new long[0];
private long[] elements = EMPTY;
public LongArray(int capacity, Schema schema) {
super(schema);
- if (!Schema.Type.LONG.equals(schema.getElementType().getType()))
- throw new AvroRuntimeException("Not a long array schema: " + schema);
if (capacity != 0)
elements = new long[capacity];
}
@@ -231,17 +273,20 @@ public class PrimitivesArrays {
elements[index1] = elements[index2];
elements[index2] = tmp;
}
+
+ @Override
+ public Schema.Type valueType() {
+ return Schema.Type.LONG;
+ }
}
- public static class BooleanArray extends GenericData.AbstractArray<Boolean> {
+ public static class BooleanArray extends PrimitiveArray<Boolean> {
private static final byte[] EMPTY = new byte[0];
private byte[] elements = EMPTY;
public BooleanArray(int capacity, Schema schema) {
super(schema);
- if (!Schema.Type.BOOLEAN.equals(schema.getElementType().getType()))
- throw new AvroRuntimeException("Not a boolean array schema: " +
schema);
if (capacity != 0)
elements = new byte[1 + (capacity / Byte.SIZE)];
}
@@ -396,17 +441,20 @@ public class PrimitivesArrays {
this.set(index1, this.get(index2));
this.set(index2, tmp);
}
+
+ @Override
+ public Schema.Type valueType() {
+ return Schema.Type.BOOLEAN;
+ }
}
- public static class FloatArray extends GenericData.AbstractArray<Float> {
+ public static class FloatArray extends PrimitiveArray<Float> {
private static final float[] EMPTY = new float[0];
private float[] elements = EMPTY;
public FloatArray(int capacity, Schema schema) {
super(schema);
- if (!Schema.Type.FLOAT.equals(schema.getElementType().getType()))
- throw new AvroRuntimeException("Not a float array schema: " + schema);
if (capacity != 0)
elements = new float[capacity];
}
@@ -500,17 +548,20 @@ public class PrimitivesArrays {
this.set(index1, this.get(index2));
this.set(index2, tmp);
}
+
+ @Override
+ public Schema.Type valueType() {
+ return Schema.Type.FLOAT;
+ }
}
- public static class DoubleArray extends GenericData.AbstractArray<Double> {
+ public static class DoubleArray extends PrimitiveArray<Double> {
private static final double[] EMPTY = new double[0];
private double[] elements = EMPTY;
public DoubleArray(int capacity, Schema schema) {
super(schema);
- if (!Schema.Type.DOUBLE.equals(schema.getElementType().getType()))
- throw new AvroRuntimeException("Not a double array schema: " + schema);
if (capacity != 0)
elements = new double[capacity];
}
@@ -604,6 +655,11 @@ public class PrimitivesArrays {
this.set(index1, this.get(index2));
this.set(index2, tmp);
}
+
+ @Override
+ public Schema.Type valueType() {
+ return Schema.Type.DOUBLE;
+ }
}
}
diff --git
a/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java
b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java
new file mode 100644
index 0000000000..040a71e2ea
--- /dev/null
+++ b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.generic;
+
+import org.apache.avro.Conversion;
+import org.apache.avro.LogicalType;
+import org.apache.avro.Schema;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class GenericDataTest {
+
+ static Schema createSchema(Schema.Type type) {
+ switch (type) {
+ case FIXED:
+ return Schema.createFixed("foo", null, null, 4);
+ case UNION:
+ return Schema.createUnion(Schema.create(Schema.Type.FLOAT),
Schema.create(Schema.Type.STRING));
+ case MAP:
+ return Schema.createMap(Schema.create(Schema.Type.FLOAT));
+ case ARRAY:
+ return Schema.createArray(Schema.create(Schema.Type.STRING));
+ case RECORD:
+ return Schema.createRecord("record", null, null, false);
+ case ENUM:
+ return Schema.createEnum("myEnum", null, null, Collections.emptyList());
+ default:
+ return Schema.create(type);
+ }
+ }
+
+ static Object sampleValue(Schema schema) {
+ if (schema.getLogicalType() != null) {
+ return new Object();
+ }
+ switch (schema.getElementType().getType()) {
+ case BOOLEAN:
+ return true;
+ case INT:
+ return Integer.MAX_VALUE;
+ case LONG:
+ return Long.MAX_VALUE;
+ case FLOAT:
+ return Float.MAX_VALUE;
+ case DOUBLE:
+ return Double.MAX_VALUE;
+ default:
+ return "foo";
+ }
+ }
+
+ static Schema createArraySchema(Schema.Type type) {
+ return Schema.createArray(createSchema(type));
+ }
+
+ static Schema createArraySchemaWithLogicalType(Schema.Type type) {
+ final LogicalType logicalType = new LogicalType("Mike");
+ Schema schema = logicalType.addToSchema(createSchema(type));
+ return Schema.createArray(schema);
+ }
+
+ static Map<Schema.Type, GenericData.AbstractArray<?>> validMappings = new
EnumMap<>(Schema.Type.class);
+ static {
+ for (Schema.Type type : Schema.Type.values()) {
+ switch (type) {
+ case INT:
+ validMappings.put(type, new PrimitivesArrays.IntArray(0,
createArraySchema(type)));
+ break;
+ case LONG:
+ validMappings.put(type, new PrimitivesArrays.LongArray(0,
createArraySchema(type)));
+ break;
+ case DOUBLE:
+ validMappings.put(type, new PrimitivesArrays.DoubleArray(0,
createArraySchema(type)));
+ break;
+ case FLOAT:
+ validMappings.put(type, new PrimitivesArrays.FloatArray(0,
createArraySchema(type)));
+ break;
+ case BOOLEAN:
+ validMappings.put(type, new PrimitivesArrays.BooleanArray(0,
createArraySchema(type)));
+ break;
+ default:
+ validMappings.put(type, new GenericData.Array<>(0,
createArraySchema(type)));
+ break;
+ }
+ }
+ }
+
+ public static Stream<Arguments> testNewArrayData() {
+
+ List<Arguments> data = new ArrayList<>();
+
+ validMappings.forEach((validKey, optimalValue) -> {
+ Class<?> optimalValueType = optimalValue.getClass();
+ // cant reuse null, or a string
+ final Schema arraySchema = createArraySchema(validKey);
+
+ data.add(Arguments.of("null input, " + validKey, arraySchema,
Collections.emptyList(), null, optimalValueType));
+ data.add(
+ Arguments.of("String input, " + validKey, arraySchema,
Collections.emptyList(), "foo", optimalValueType));
+ // should reuse arraylist & generic array
+ data.add(Arguments.of("ArrayList input, " + validKey, arraySchema,
Collections.emptyList(), new ArrayList<>(),
+ ArrayList.class));
+ data.add(Arguments.of("Generic input, " + validKey, arraySchema,
Collections.emptyList(),
+ new GenericData.Array<Object>(0, arraySchema),
GenericData.Array.class));
+ // with logical type
+ if (validKey != Schema.Type.UNION) {
+ data.add(Arguments.of("null (with logical type) input, " + validKey,
createArraySchemaWithLogicalType(validKey),
+ Collections.emptyList(), null, GenericData.Array.class));
+ data.add(Arguments.of("String (with logical type) input, " + validKey,
+ createArraySchemaWithLogicalType(validKey),
Collections.emptyList(), "foo", GenericData.Array.class));
+ data.add(Arguments.of("ArrayList (with logical type) input, " +
validKey, arraySchema, Collections.emptyList(),
+ new ArrayList<>(), ArrayList.class));
+ data.add(Arguments.of("Generic (with logical type) input, " +
validKey, arraySchema, Collections.emptyList(),
+ new GenericData.Array<Object>(0, arraySchema),
GenericData.Array.class));
+// with logical type and conversion
+
+ validMappings.forEach((targetKey, targetType) -> {
+ if (targetKey != Schema.Type.UNION) {
+ data.add(Arguments.of("null (with logical type) input, " +
validKey + " convert to " + targetType,
+ createArraySchemaWithLogicalType(targetKey),
singleConversion(targetKey), null, targetType.getClass()));
+ data.add(Arguments.of("String (with logical type) input, " +
validKey + " convert to " + targetType,
+ createArraySchemaWithLogicalType(targetKey),
singleConversion(targetKey), "foo",
+ targetType.getClass()));
+ data.add(Arguments.of("ArrayList (with logical type) input, " +
validKey + " convert to " + targetType,
+ createArraySchemaWithLogicalType(targetKey),
singleConversion(targetKey), new ArrayList<>(),
+ ArrayList.class));
+ data.add(Arguments.of("Generic (with logical type) input, " +
validKey, arraySchema,
+ Collections.emptyList(), new GenericData.Array<Object>(0,
arraySchema), GenericData.Array.class));
+ }
+ });
+
+ }
+
+ validMappings.forEach((suppliedValueType, suppliedValue) -> {
+ data.add(Arguments.of(suppliedValueType + " input " + validKey,
arraySchema, Collections.emptyList(),
+ suppliedValue, optimalValueType));
+ if (validKey != Schema.Type.UNION)
+ data.add(Arguments.of(suppliedValueType + " (with logical type)
input " + validKey,
+ createArraySchemaWithLogicalType(validKey),
Collections.emptyList(), suppliedValue,
+ GenericData.Array.class));
+ });
+ });
+ return data.stream();
+ }
+
+ private static <T> List<Conversion<?>> singleConversion(Schema.Type
targetKey) {
+ return Collections.singletonList(new Conversion<T>() {
+
+ @Override
+ public Class<T> getConvertedType() {
+ switch (targetKey) {
+ case INT:
+ return (Class<T>) Integer.TYPE;
+ case LONG:
+ return (Class<T>) Long.TYPE;
+ case DOUBLE:
+ return (Class<T>) Double.TYPE;
+ case FLOAT:
+ return (Class<T>) Float.TYPE;
+ case BOOLEAN:
+ return (Class<T>) Boolean.TYPE;
+ default:
+ return (Class<T>) Object.class;
+ }
+
+ }
+
+ @Override
+ public String getLogicalTypeName() {
+ return "Mike";
+ }
+
+ });
+ }
+
+ @ParameterizedTest
+ @MethodSource("testNewArrayData")
+ void testNewArray(String description, Schema schema, List<Conversion<?>>
convertions, Object initial,
+ Class<? extends Collection<?>> expectedType) {
+ GenericData underTest = new GenericData();
+ convertions.forEach(underTest::addLogicalTypeConversion);
+
+ Object result = underTest.newArray(initial, 10, schema);
+ // never null
+ assertNotNull(result, description);
+ // should always be the best fit type, or a generic array
+ assertTrue(expectedType.isInstance(result) || result instanceof
GenericData.Array,
+ result.getClass() + " when expected generic or " +
expectedType.getName() + " - " + description);
+
+ // must be a collection from the above list
+ Collection<Object> resultCollection = (Collection<Object>) result;
+
+ // the result should be empty
+ assertEquals(0, resultCollection.size(), "not empty - " + description);
+
+ // is the supplied type matched the return type, then we should not have
+ // allocated a new object
+ if (initial != null && initial.getClass() == result.getClass()) {
+ // if the result type is the same as the initial type, it should be
reused, so
+ // we should not have allocated a new object
+ assertSame(initial, result, "not reused - " + description);
+ }
+
+ // is the supplied type matched the return type, then we should not have
+ // allocated a new object
+ if (initial == null) {
+ // if we did allocate a not object, we should have allocated the optimal
type
+ assertSame(expectedType, result.getClass(), "not optimal - " +
description);
+ }
+ // check the schema was set correctly
+ if (result instanceof GenericContainer && result != initial) {
+ GenericContainer resultArray = (GenericContainer) result;
+ assertEquals(schema.getElementType(),
resultArray.getSchema().getElementType(),
+ "wrong element type - " + description);
+ }
+
+ // for primitive arrays, we should not have a logical type, and the
underlying
+ // array should be the correct type
+ if (result instanceof PrimitivesArrays.PrimitiveArray) {
+ assertSame(expectedType, resultCollection.getClass(), "wrong type for
primitive - " + description);
+ }
+
+ final Object sample = sampleValue(schema);
+ resultCollection.add(sample);
+ assertEquals(1, resultCollection.size(), "not added - " + description);
+ assertEquals(sample, resultCollection.iterator().next(), "wrong value - "
+ description);
+ assertEquals(1, resultCollection.size(), "disappeared - " + description);
+
+ Object result2 = underTest.newArray(resultCollection, 10, schema);
+ assertSame(result, result2, "not reused - " + description);
+
+ assertEquals(0, resultCollection.size(), "not reset - " + description);
+ }
+
+}