rdblue commented on code in PR #12512:
URL: https://github.com/apache/iceberg/pull/12512#discussion_r2059371622


##########
parquet/src/test/java/org/apache/iceberg/parquet/TestVariantReaders.java:
##########
@@ -879,6 +896,405 @@ public void testMixedRecords() throws IOException {
     VariantTestUtil.assertEqual(expectedThree, actualThreeVariant.value());
   }
 
+  @Test
+  public void testSimpleArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr =
+        elements(elementType, List.of(Variants.of("comedy"), 
Variants.of("drama")));
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.of("drama"));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testNullArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(element(shreddedType)));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    GenericRecord var =
+        record(
+            variantType,
+            Map.of(
+                "metadata",
+                VariantTestUtil.emptyMetadata(),
+                "value",
+                serialize(Variants.ofNull())));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(Variants.ofNull(), actualVariant.value());
+  }
+
+  @Test
+  public void testEmptyArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(element(shreddedType)));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr = List.of();
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    assertThat(actualVariant.value().type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualVariant.value().asArray().numElements()).isEqualTo(0);
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+  }
+
+  @Test
+  public void testArrayWithNull() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr =
+        elements(
+            elementType,
+            Lists.newArrayList(
+                List.of(Variants.of("comedy"), Variants.ofNull(), 
Variants.of("drama"))));
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.ofNull());
+    expectedArray.add(Variants.of("drama"));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    assertThat(actualVariant.value().type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualVariant.value().asArray().numElements()).isEqualTo(3);
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testNestedArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType outerElementType = element(list(elementType));
+    GroupType variantType = variant("var", 2, list(outerElementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    ValueArray inner1 = Variants.array();
+    inner1.add(Variants.of("comedy"));
+    inner1.add(Variants.of("drama"));
+
+    List<GenericRecord> outer1 = elements(outerElementType, List.of(inner1, 
Variants.array()));
+    GenericRecord var =
+        record(
+            variantType,
+            Map.of("metadata", VariantTestUtil.emptyMetadata(), "typed_value", 
outer1));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    ValueArray expectedInner1 = Variants.array();
+    expectedInner1.add(Variants.of("comedy"));
+    expectedInner1.add(Variants.of("drama"));
+    ValueArray expectedInner2 = Variants.array();
+    expectedArray.add(expectedInner1);
+    expectedArray.add(expectedInner2);
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    // Verify
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testArrayWithNestedObject() throws IOException {

Review Comment:
   This is pretty complicated for array with nested object. I'd expect just a 
top-level array with 2 or 3 nested objects mixed with unshredded variant values.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to