This is an automated email from the ASF dual-hosted git repository.

opwvhk pushed a commit to branch branch-1.12
in repository https://gitbox.apache.org/repos/asf/avro.git

commit c1592749faa11ce3c84b9d8a25fb47769454d838
Author: Mike Skells <[email protected]>
AuthorDate: Tue May 6 12:46:41 2025 +0100

    AVRO-4039 [java] fix GenericData.newArray to only return an appropriate 
array implementation (#3307)
    
    * AVRO-4039 fix GenericData.newArray
    only return an appropriate array
    
    * AVRO-4039 fix GenericData.newArray
    only return an appropriate array
    
    * AVRO-4039 fix GenericData.newArray
    spotless
    
    * AVRO-4039
    fix import that spotless removed
    
    * AVRO-4039
    review feedback
    remove schema check on returned value
    Check convertors with logical types
    
    * AVRO-4039
    review feedback
    
    * AVRO-4039
    review feedback
    
    (cherry picked from commit 09277d0deb7b5a04fc5e5430c56f4fd160df01fb)
---
 .../java/org/apache/avro/generic/GenericData.java  |  86 +++++--
 .../org/apache/avro/generic/PrimitivesArrays.java  |  88 +++++--
 .../org/apache/avro/generic/GenericDataTest.java   | 262 +++++++++++++++++++++
 3 files changed, 395 insertions(+), 41 deletions(-)

diff --git 
a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java 
b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
index 6db0a40eee..be5c7c422d 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/GenericData.java
@@ -57,6 +57,7 @@ import org.apache.avro.io.EncoderFactory;
 import org.apache.avro.io.FastReaderBuilder;
 import org.apache.avro.util.Utf8;
 import org.apache.avro.util.internal.Accessor;
+import org.apache.avro.generic.PrimitivesArrays.PrimitiveArray;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import org.apache.avro.util.springframework.ConcurrentReferenceHashMap;
@@ -1515,38 +1516,73 @@ public class GenericData {
 
   }
 
-  /*
+  /**
    * Called to create new array instances. Subclasses may override to use a
-   * different array implementation. By default, this returns a {@link
-   * GenericData.Array}.
+   * different array implementation. By default, this returns a
+   * {@link GenericData.Array}.
+   *
+   * @param old    the old array instance to reuse, if possible. If the old 
array
+   *               is an appropriate type, it may be cleared and returned.
+   * @param size   the size of the array to create.
+   * @param schema the schema of the array elements.
    */
   public Object newArray(Object old, int size, Schema schema) {
-    if (old instanceof GenericArray) {
-      ((GenericArray<?>) old).reset();
-      return old;
-    } else if (old instanceof Collection) {
-      ((Collection<?>) old).clear();
-      return old;
-    } else {
-      if (schema.getElementType().getType() == Type.INT) {
-        return new PrimitivesArrays.IntArray(size, schema);
-      }
-      if (schema.getElementType().getType() == Type.BOOLEAN) {
-        return new PrimitivesArrays.BooleanArray(size, schema);
-      }
-      if (schema.getElementType().getType() == Type.LONG) {
-        return new PrimitivesArrays.LongArray(size, schema);
-      }
-      if (schema.getElementType().getType() == Type.FLOAT) {
-        return new PrimitivesArrays.FloatArray(size, schema);
-      }
-      if (schema.getElementType().getType() == Type.DOUBLE) {
-        return new PrimitivesArrays.DoubleArray(size, schema);
+    final var logicalType = schema.getElementType().getLogicalType();
+    final var conversion = getConversionFor(logicalType);
+    final var optimalValueType = optimalValueType(schema, logicalType,
+        conversion == null ? null : conversion.getConvertedType());
+
+    if (old != null) {
+      if (old instanceof GenericData.Array<?>) {
+        ((GenericData.Array<?>) old).reset();
+        return old;
+      } else if (old instanceof PrimitiveArray) {
+        var primitiveOld = (PrimitiveArray<?>) old;
+        if (primitiveOld.valueType() == optimalValueType) {
+          primitiveOld.reset();
+          return old;
+        }
+      } else if (old instanceof Collection) {
+        ((Collection<?>) old).clear();
+        return old;
       }
-      return new GenericData.Array<Object>(size, schema);
     }
+    // we can't reuse the old array, so we create a new one
+    return PrimitivesArrays.createOptimizedArray(size, schema, 
optimalValueType);
   }
 
+  /**
+   * Determine the optimal value type for an array. The value type is 
determined
+   * form the convertedElementType if supplied, otherwise the underlying type 
from
+   * the schema
+   *
+   * @param schema               the schema of the array
+   * @param convertedElementType the converted elements value type. This may 
not
+   *                             be the same and the schema if for instance 
there
+   *                             is a logical type, and a convertor is use
+   * @return an indicator for the type of the array, useful for
+   *         {@link PrimitivesArrays#createOptimizedArray(int, Schema, 
Schema.Type)}.
+   *         May be null if the type is not optimised
+   */
+  public static Schema.Type optimalValueType(Schema schema, LogicalType 
logicalType, Class<?> convertedElementType) {
+    if (logicalType == null)
+      // if there are no logical types- use the schema type
+      return schema.getElementType().getType();
+    else if (convertedElementType == null)
+      // if there is no convertor
+      return null;
+    else
+      // use the converted type
+      return PRIMITIVE_TYPES_WITH_SPECIALISED_ARRAYS.get(convertedElementType);
+  }
+
+  private final static Map<Class<?>, Schema.Type> 
PRIMITIVE_TYPES_WITH_SPECIALISED_ARRAYS = Map.of(//
+      Long.TYPE, Schema.Type.LONG, //
+      Integer.TYPE, Schema.Type.INT, //
+      Float.TYPE, Schema.Type.FLOAT, //
+      Double.TYPE, Schema.Type.DOUBLE, //
+      Boolean.TYPE, Schema.Type.BOOLEAN);
+
   /**
    * Called to create new array instances. Subclasses may override to use a
    * different map implementation. By default, this returns a {@link HashMap}.
diff --git 
a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java 
b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
index d34ce0f5bc..595cc1576e 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/PrimitivesArrays.java
@@ -17,7 +17,6 @@
  */
 package org.apache.avro.generic;
 
-import org.apache.avro.AvroRuntimeException;
 import org.apache.avro.Schema;
 
 import java.util.Arrays;
@@ -25,15 +24,55 @@ import java.util.Collection;
 
 public class PrimitivesArrays {
 
-  public static class IntArray extends GenericData.AbstractArray<Integer> {
+  /**
+   * Create a primitive array if the value type is has an associated optimised
+   * implementation, otherwise a generic array is returned. The value type is
+   * determined form the convertedElementType if supplied, otherwise the
+   * underlying type from the schema
+   *
+   * @param size      the size of the array to create
+   * @param schema    the schema of the array
+   * @param valueType the converted elements value type. This may not be the 
same
+   *                  and the schema if for instance there is a logical type, 
and
+   *                  a convertor is use
+   * @return an instance of a primitive array or a Generic array if the value 
type
+   *         is does not have an associated optimised implementation.
+   */
+  public static GenericData.AbstractArray<?> createOptimizedArray(int size, 
Schema schema, Schema.Type valueType) {
+
+    if (valueType != null)
+      switch (valueType) {
+      case INT:
+        return new PrimitivesArrays.IntArray(size, schema);
+      case BOOLEAN:
+        return new PrimitivesArrays.BooleanArray(size, schema);
+      case LONG:
+        return new PrimitivesArrays.LongArray(size, schema);
+      case FLOAT:
+        return new PrimitivesArrays.FloatArray(size, schema);
+      case DOUBLE:
+        return new PrimitivesArrays.DoubleArray(size, schema);
+      default:
+        break;
+      }
+    return new GenericData.Array<>(size, schema);
+  }
+
+  public abstract static class PrimitiveArray<T> extends 
GenericData.AbstractArray<T> {
+    PrimitiveArray(Schema schema) {
+      super(schema);
+    }
+
+    public abstract Schema.Type valueType();
+  }
+
+  public static class IntArray extends PrimitiveArray<Integer> {
     private static final int[] EMPTY = new int[0];
 
     private int[] elements = EMPTY;
 
     public IntArray(int capacity, Schema schema) {
       super(schema);
-      if (!Schema.Type.INT.equals(schema.getElementType().getType()))
-        throw new AvroRuntimeException("Not a int array schema: " + schema);
       if (capacity != 0)
         elements = new int[capacity];
     }
@@ -127,17 +166,20 @@ public class PrimitivesArrays {
       elements[index1] = elements[index2];
       elements[index2] = tmp;
     }
+
+    @Override
+    public Schema.Type valueType() {
+      return Schema.Type.INT;
+    }
   }
 
-  public static class LongArray extends GenericData.AbstractArray<Long> {
+  public static class LongArray extends PrimitiveArray<Long> {
     private static final long[] EMPTY = new long[0];
 
     private long[] elements = EMPTY;
 
     public LongArray(int capacity, Schema schema) {
       super(schema);
-      if (!Schema.Type.LONG.equals(schema.getElementType().getType()))
-        throw new AvroRuntimeException("Not a long array schema: " + schema);
       if (capacity != 0)
         elements = new long[capacity];
     }
@@ -231,17 +273,20 @@ public class PrimitivesArrays {
       elements[index1] = elements[index2];
       elements[index2] = tmp;
     }
+
+    @Override
+    public Schema.Type valueType() {
+      return Schema.Type.LONG;
+    }
   }
 
-  public static class BooleanArray extends GenericData.AbstractArray<Boolean> {
+  public static class BooleanArray extends PrimitiveArray<Boolean> {
     private static final byte[] EMPTY = new byte[0];
 
     private byte[] elements = EMPTY;
 
     public BooleanArray(int capacity, Schema schema) {
       super(schema);
-      if (!Schema.Type.BOOLEAN.equals(schema.getElementType().getType()))
-        throw new AvroRuntimeException("Not a boolean array schema: " + 
schema);
       if (capacity != 0)
         elements = new byte[1 + (capacity / Byte.SIZE)];
     }
@@ -396,17 +441,20 @@ public class PrimitivesArrays {
       this.set(index1, this.get(index2));
       this.set(index2, tmp);
     }
+
+    @Override
+    public Schema.Type valueType() {
+      return Schema.Type.BOOLEAN;
+    }
   }
 
-  public static class FloatArray extends GenericData.AbstractArray<Float> {
+  public static class FloatArray extends PrimitiveArray<Float> {
     private static final float[] EMPTY = new float[0];
 
     private float[] elements = EMPTY;
 
     public FloatArray(int capacity, Schema schema) {
       super(schema);
-      if (!Schema.Type.FLOAT.equals(schema.getElementType().getType()))
-        throw new AvroRuntimeException("Not a float array schema: " + schema);
       if (capacity != 0)
         elements = new float[capacity];
     }
@@ -500,17 +548,20 @@ public class PrimitivesArrays {
       this.set(index1, this.get(index2));
       this.set(index2, tmp);
     }
+
+    @Override
+    public Schema.Type valueType() {
+      return Schema.Type.FLOAT;
+    }
   }
 
-  public static class DoubleArray extends GenericData.AbstractArray<Double> {
+  public static class DoubleArray extends PrimitiveArray<Double> {
     private static final double[] EMPTY = new double[0];
 
     private double[] elements = EMPTY;
 
     public DoubleArray(int capacity, Schema schema) {
       super(schema);
-      if (!Schema.Type.DOUBLE.equals(schema.getElementType().getType()))
-        throw new AvroRuntimeException("Not a double array schema: " + schema);
       if (capacity != 0)
         elements = new double[capacity];
     }
@@ -604,6 +655,11 @@ public class PrimitivesArrays {
       this.set(index1, this.get(index2));
       this.set(index2, tmp);
     }
+
+    @Override
+    public Schema.Type valueType() {
+      return Schema.Type.DOUBLE;
+    }
   }
 
 }
diff --git 
a/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java 
b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java
new file mode 100644
index 0000000000..040a71e2ea
--- /dev/null
+++ b/lang/java/avro/src/test/java/org/apache/avro/generic/GenericDataTest.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.avro.generic;
+
+import org.apache.avro.Conversion;
+import org.apache.avro.LogicalType;
+import org.apache.avro.Schema;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Map;
+
+import java.util.stream.Stream;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+class GenericDataTest {
+
+  static Schema createSchema(Schema.Type type) {
+    switch (type) {
+    case FIXED:
+      return Schema.createFixed("foo", null, null, 4);
+    case UNION:
+      return Schema.createUnion(Schema.create(Schema.Type.FLOAT), 
Schema.create(Schema.Type.STRING));
+    case MAP:
+      return Schema.createMap(Schema.create(Schema.Type.FLOAT));
+    case ARRAY:
+      return Schema.createArray(Schema.create(Schema.Type.STRING));
+    case RECORD:
+      return Schema.createRecord("record", null, null, false);
+    case ENUM:
+      return Schema.createEnum("myEnum", null, null, Collections.emptyList());
+    default:
+      return Schema.create(type);
+    }
+  }
+
+  static Object sampleValue(Schema schema) {
+    if (schema.getLogicalType() != null) {
+      return new Object();
+    }
+    switch (schema.getElementType().getType()) {
+    case BOOLEAN:
+      return true;
+    case INT:
+      return Integer.MAX_VALUE;
+    case LONG:
+      return Long.MAX_VALUE;
+    case FLOAT:
+      return Float.MAX_VALUE;
+    case DOUBLE:
+      return Double.MAX_VALUE;
+    default:
+      return "foo";
+    }
+  }
+
+  static Schema createArraySchema(Schema.Type type) {
+    return Schema.createArray(createSchema(type));
+  }
+
+  static Schema createArraySchemaWithLogicalType(Schema.Type type) {
+    final LogicalType logicalType = new LogicalType("Mike");
+    Schema schema = logicalType.addToSchema(createSchema(type));
+    return Schema.createArray(schema);
+  }
+
+  static Map<Schema.Type, GenericData.AbstractArray<?>> validMappings = new 
EnumMap<>(Schema.Type.class);
+  static {
+    for (Schema.Type type : Schema.Type.values()) {
+      switch (type) {
+      case INT:
+        validMappings.put(type, new PrimitivesArrays.IntArray(0, 
createArraySchema(type)));
+        break;
+      case LONG:
+        validMappings.put(type, new PrimitivesArrays.LongArray(0, 
createArraySchema(type)));
+        break;
+      case DOUBLE:
+        validMappings.put(type, new PrimitivesArrays.DoubleArray(0, 
createArraySchema(type)));
+        break;
+      case FLOAT:
+        validMappings.put(type, new PrimitivesArrays.FloatArray(0, 
createArraySchema(type)));
+        break;
+      case BOOLEAN:
+        validMappings.put(type, new PrimitivesArrays.BooleanArray(0, 
createArraySchema(type)));
+        break;
+      default:
+        validMappings.put(type, new GenericData.Array<>(0, 
createArraySchema(type)));
+        break;
+      }
+    }
+  }
+
+  public static Stream<Arguments> testNewArrayData() {
+
+    List<Arguments> data = new ArrayList<>();
+
+    validMappings.forEach((validKey, optimalValue) -> {
+      Class<?> optimalValueType = optimalValue.getClass();
+      // cant reuse null, or a string
+      final Schema arraySchema = createArraySchema(validKey);
+
+      data.add(Arguments.of("null input, " + validKey, arraySchema, 
Collections.emptyList(), null, optimalValueType));
+      data.add(
+          Arguments.of("String input, " + validKey, arraySchema, 
Collections.emptyList(), "foo", optimalValueType));
+      // should reuse arraylist & generic array
+      data.add(Arguments.of("ArrayList input, " + validKey, arraySchema, 
Collections.emptyList(), new ArrayList<>(),
+          ArrayList.class));
+      data.add(Arguments.of("Generic input, " + validKey, arraySchema, 
Collections.emptyList(),
+          new GenericData.Array<Object>(0, arraySchema), 
GenericData.Array.class));
+      // with logical type
+      if (validKey != Schema.Type.UNION) {
+        data.add(Arguments.of("null (with logical type) input, " + validKey, 
createArraySchemaWithLogicalType(validKey),
+            Collections.emptyList(), null, GenericData.Array.class));
+        data.add(Arguments.of("String (with logical type) input, " + validKey,
+            createArraySchemaWithLogicalType(validKey), 
Collections.emptyList(), "foo", GenericData.Array.class));
+        data.add(Arguments.of("ArrayList (with logical type) input, " + 
validKey, arraySchema, Collections.emptyList(),
+            new ArrayList<>(), ArrayList.class));
+        data.add(Arguments.of("Generic (with logical type) input, " + 
validKey, arraySchema, Collections.emptyList(),
+            new GenericData.Array<Object>(0, arraySchema), 
GenericData.Array.class));
+//         with logical type and conversion
+
+        validMappings.forEach((targetKey, targetType) -> {
+          if (targetKey != Schema.Type.UNION) {
+            data.add(Arguments.of("null (with logical type) input, " + 
validKey + " convert to " + targetType,
+                createArraySchemaWithLogicalType(targetKey), 
singleConversion(targetKey), null, targetType.getClass()));
+            data.add(Arguments.of("String (with logical type) input, " + 
validKey + " convert to " + targetType,
+                createArraySchemaWithLogicalType(targetKey), 
singleConversion(targetKey), "foo",
+                targetType.getClass()));
+            data.add(Arguments.of("ArrayList (with logical type) input, " + 
validKey + " convert to " + targetType,
+                createArraySchemaWithLogicalType(targetKey), 
singleConversion(targetKey), new ArrayList<>(),
+                ArrayList.class));
+            data.add(Arguments.of("Generic (with logical type) input, " + 
validKey, arraySchema,
+                Collections.emptyList(), new GenericData.Array<Object>(0, 
arraySchema), GenericData.Array.class));
+          }
+        });
+
+      }
+
+      validMappings.forEach((suppliedValueType, suppliedValue) -> {
+        data.add(Arguments.of(suppliedValueType + " input " + validKey, 
arraySchema, Collections.emptyList(),
+            suppliedValue, optimalValueType));
+        if (validKey != Schema.Type.UNION)
+          data.add(Arguments.of(suppliedValueType + " (with logical type) 
input " + validKey,
+              createArraySchemaWithLogicalType(validKey), 
Collections.emptyList(), suppliedValue,
+              GenericData.Array.class));
+      });
+    });
+    return data.stream();
+  }
+
+  private static <T> List<Conversion<?>> singleConversion(Schema.Type 
targetKey) {
+    return Collections.singletonList(new Conversion<T>() {
+
+      @Override
+      public Class<T> getConvertedType() {
+        switch (targetKey) {
+        case INT:
+          return (Class<T>) Integer.TYPE;
+        case LONG:
+          return (Class<T>) Long.TYPE;
+        case DOUBLE:
+          return (Class<T>) Double.TYPE;
+        case FLOAT:
+          return (Class<T>) Float.TYPE;
+        case BOOLEAN:
+          return (Class<T>) Boolean.TYPE;
+        default:
+          return (Class<T>) Object.class;
+        }
+
+      }
+
+      @Override
+      public String getLogicalTypeName() {
+        return "Mike";
+      }
+
+    });
+  }
+
+  @ParameterizedTest
+  @MethodSource("testNewArrayData")
+  void testNewArray(String description, Schema schema, List<Conversion<?>> 
convertions, Object initial,
+      Class<? extends Collection<?>> expectedType) {
+    GenericData underTest = new GenericData();
+    convertions.forEach(underTest::addLogicalTypeConversion);
+
+    Object result = underTest.newArray(initial, 10, schema);
+    // never null
+    assertNotNull(result, description);
+    // should always be the best fit type, or a generic array
+    assertTrue(expectedType.isInstance(result) || result instanceof 
GenericData.Array,
+        result.getClass() + " when expected generic or " + 
expectedType.getName() + " - " + description);
+
+    // must be a collection from the above list
+    Collection<Object> resultCollection = (Collection<Object>) result;
+
+    // the result should be empty
+    assertEquals(0, resultCollection.size(), "not empty - " + description);
+
+    // is the supplied type matched the return type, then we should not have
+    // allocated a new object
+    if (initial != null && initial.getClass() == result.getClass()) {
+      // if the result type is the same as the initial type, it should be 
reused, so
+      // we should not have allocated a new object
+      assertSame(initial, result, "not reused - " + description);
+    }
+
+    // is the supplied type matched the return type, then we should not have
+    // allocated a new object
+    if (initial == null) {
+      // if we did allocate a not object, we should have allocated the optimal 
type
+      assertSame(expectedType, result.getClass(), "not optimal - " + 
description);
+    }
+    // check the schema was set correctly
+    if (result instanceof GenericContainer && result != initial) {
+      GenericContainer resultArray = (GenericContainer) result;
+      assertEquals(schema.getElementType(), 
resultArray.getSchema().getElementType(),
+          "wrong element type - " + description);
+    }
+
+    // for primitive arrays, we should not have a logical type, and the 
underlying
+    // array should be the correct type
+    if (result instanceof PrimitivesArrays.PrimitiveArray) {
+      assertSame(expectedType, resultCollection.getClass(), "wrong type for 
primitive - " + description);
+    }
+
+    final Object sample = sampleValue(schema);
+    resultCollection.add(sample);
+    assertEquals(1, resultCollection.size(), "not added - " + description);
+    assertEquals(sample, resultCollection.iterator().next(), "wrong value - " 
+ description);
+    assertEquals(1, resultCollection.size(), "disappeared - " + description);
+
+    Object result2 = underTest.newArray(resultCollection, 10, schema);
+    assertSame(result, result2, "not reused - " + description);
+
+    assertEquals(0, resultCollection.size(), "not reset - " + description);
+  }
+
+}

Reply via email to