rdblue commented on code in PR #12512:
URL: https://github.com/apache/iceberg/pull/12512#discussion_r2006404901


##########
parquet/src/test/java/org/apache/iceberg/parquet/TestVariantReaders.java:
##########
@@ -879,6 +891,270 @@ public void testMixedRecords() throws IOException {
     VariantTestUtil.assertEqual(expectedThree, actualThreeVariant.value());
   }
 
+  @Test
+  public void testSimpleArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(shreddedType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr = elements(shreddedType, List.of("comedy", 
"drama"));
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.of("drama"));
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testNullArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(shreddedType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    GenericRecord var =
+        record(
+            variantType,
+            Map.of(
+                "metadata",
+                VariantTestUtil.emptyMetadata(),
+                "value",
+                serialize(Variants.ofNull())));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(Variants.ofNull(), actualVariant.value());
+  }
+
+  @Test
+  public void testEmptyArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(shreddedType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr = List.of();
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    assertThat(actualVariant.value().type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualVariant.value().asArray().numElements()).isEqualTo(0);
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+  }
+
+  @Test
+  public void testArrayWithNull() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(shreddedType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr = elements(shreddedType, 
Lists.newArrayList("comedy", null, "drama"));
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    assertThat(actualVariant.value().type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualVariant.value().asArray().numElements()).isEqualTo(3);
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.ofNull());
+    expectedArray.add(Variants.of("drama"));
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testNestedArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType innerListType = list(shreddedType);
+    GroupType variantType = variant("var", 2, list(innerListType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> inner1 = elements(shreddedType, List.of("comedy", 
"drama"));
+    List<GenericRecord> inner2 = elements(shreddedType, List.of());
+    List<GenericRecord> outer1 = elements(innerListType, List.of(inner1, 
inner2));
+    GenericRecord var =
+        record(
+            variantType,
+            Map.of("metadata", VariantTestUtil.emptyMetadata(), "typed_value", 
outer1));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    // Verify
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    ValueArray expectedArray = Variants.array();
+    ValueArray expectedInner1 = Variants.array();
+    expectedInner1.add(Variants.of("comedy"));
+    expectedInner1.add(Variants.of("drama"));
+    ValueArray expectedInner2 = Variants.array();
+    expectedArray.add(expectedInner1);
+    expectedArray.add(expectedInner2);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testArrayWithNestedObject() throws IOException {
+    GroupType fieldA = field("a", shreddedPrimitive(PrimitiveTypeName.INT32));
+    GroupType fieldB = field("b", shreddedPrimitive(PrimitiveTypeName.BINARY, 
STRING));
+    GroupType shreddedFields = objectFields(fieldA, fieldB);
+    GroupType listType = list(shreddedFields);
+    GroupType fieldC = field("c", listType);
+    GroupType objectFields = objectFields(fieldC);
+    GroupType variantType = variant("var", 2, objectFields);
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    // Row 1
+    GenericRecord a1 = record(fieldA, Map.of("typed_value", 1));
+    GenericRecord b1 = record(fieldB, Map.of("typed_value", "comedy"));
+    GenericRecord shredded1 = record(shreddedFields, Map.of("a", a1, "b", b1));
+    GenericRecord a2 = record(fieldA, Map.of("typed_value", 2));
+    GenericRecord b2 = record(fieldB, Map.of("typed_value", "drama"));
+    GenericRecord shredded2 = record(shreddedFields, Map.of("a", a2, "b", b2));
+    List<GenericRecord> arr1 = elements(shreddedFields, List.of(shredded1, 
shredded2));
+    GenericRecord element1 = record(fieldC, Map.of("typed_value", arr1));
+    GenericRecord c1 = record(objectFields, Map.of("c", element1));
+    GenericRecord var1 =
+        record(variantType, Map.of("metadata", TEST_METADATA_BUFFER, 
"typed_value", c1));
+    GenericRecord row1 = record(parquetSchema, Map.of("id", 1, "var", var1));
+
+    // Row 2
+    GenericRecord a3 = record(fieldA, Map.of("typed_value", 3));
+    GenericRecord b3 = record(fieldB, Map.of("typed_value", "action"));
+    GenericRecord shredded3 = record(shreddedFields, Map.of("a", a3, "b", b3));
+    GenericRecord a4 = record(fieldA, Map.of("typed_value", 4));
+    GenericRecord b4 = record(fieldB, Map.of("typed_value", "horror"));
+    GenericRecord shredded4 = record(shreddedFields, Map.of("a", a4, "b", b4));
+    List<GenericRecord> arr2 = elements(shreddedFields, List.of(shredded3, 
shredded4));
+    GenericRecord element2 = record(fieldC, Map.of("typed_value", arr2));
+    GenericRecord c2 = record(objectFields, Map.of("c", element2));
+    GenericRecord var2 =
+        record(variantType, Map.of("metadata", TEST_METADATA_BUFFER, 
"typed_value", c2));
+    GenericRecord row2 = record(parquetSchema, Map.of("id", 2, "var", var2));
+
+    // verify
+    List<Record> actual = writeAndRead(parquetSchema, List.of(row1, row2));
+    Record actual1 = actual.get(0);
+    assertThat(actual1.getField("id")).isEqualTo(1);
+    assertThat(actual1.getField("var")).isInstanceOf(Variant.class);
+
+    ShreddedObject expected1 = Variants.object(TEST_METADATA);
+    ValueArray expectedArray1 = Variants.array();
+    ShreddedObject expectedElement1 = Variants.object(TEST_METADATA);
+    expectedElement1.put("a", Variants.of(1));
+    expectedElement1.put("b", Variants.of("comedy"));
+    expectedArray1.add(expectedElement1);
+    ShreddedObject expectedElement2 = Variants.object(TEST_METADATA);
+    expectedElement2.put("a", Variants.of(2));
+    expectedElement2.put("b", Variants.of("drama"));
+    expectedArray1.add(expectedElement2);
+    expected1.put("c", expectedArray1);
+
+    Variant actualVariant1 = (Variant) actual1.getField("var");
+    VariantTestUtil.assertEqual(TEST_METADATA, actualVariant1.metadata());
+    VariantTestUtil.assertEqual(expected1, actualVariant1.value());
+
+    Record actual2 = actual.get(1);
+    assertThat(actual2.getField("id")).isEqualTo(2);
+    assertThat(actual2.getField("var")).isInstanceOf(Variant.class);
+
+    ShreddedObject expected2 = Variants.object(TEST_METADATA);
+    ValueArray expectedArray2 = Variants.array();
+    ShreddedObject expectedElement3 = Variants.object(TEST_METADATA);
+    expectedElement3.put("a", Variants.of(3));
+    expectedElement3.put("b", Variants.of("action"));
+    expectedArray2.add(expectedElement3);
+    ShreddedObject expectedElement4 = Variants.object(TEST_METADATA);
+    expectedElement4.put("a", Variants.of(4));
+    expectedElement4.put("b", Variants.of("horror"));
+    expectedArray2.add(expectedElement4);
+    expected2.put("c", expectedArray2);
+
+    Variant actualVariant2 = (Variant) actual2.getField("var");
+    VariantTestUtil.assertEqual(TEST_METADATA, actualVariant2.metadata());
+    VariantTestUtil.assertEqual(expected2, actualVariant2.value());
+  }
+
+  @Test
+  public void testArrayWithNonArray() throws IOException {

Review Comment:
   This tests the behavior with different combinations of the top-level `value` 
and `typed_value`, which is good. But I think there should be a couple other 
tests as well. First, I'd like to see another array at the end of this test to 
verify the behavior of consuming nulls from the array columns (for the 
non-array variant values) is correct. That would uncover the case where a 
non-null `value` causes the reader to misbehave and isn't currently tested 
because there is only one shredded array at the beginning.
   
   Second, I'd like to see a conflict test cases, one for array elements and 
for the array itself, where both `value` and `typed_value` are non-null. See 
`testValueAndTypedValueConflict` for an example.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to