aihuaxu commented on code in PR #12512:
URL: https://github.com/apache/iceberg/pull/12512#discussion_r2060868714


##########
parquet/src/test/java/org/apache/iceberg/parquet/TestVariantReaders.java:
##########
@@ -885,6 +897,460 @@ public void testMixedRecords() throws IOException {
     VariantTestUtil.assertEqual(expectedThree, actualThreeVariant.value());
   }
 
+  @Test
+  public void testSimpleArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr =
+        List.of(
+            record(elementType, Map.of("typed_value", "comedy")),
+            record(elementType, Map.of("typed_value", "drama")));
+
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.of("drama"));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testNullArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(element(shreddedType)));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    GenericRecord var =
+        record(
+            variantType,
+            Map.of(
+                "metadata",
+                VariantTestUtil.emptyMetadata(),
+                "value",
+                serialize(Variants.ofNull())));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(Variants.ofNull(), actualVariant.value());
+  }
+
+  @Test
+  public void testEmptyArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType variantType = variant("var", 2, list(element(shreddedType)));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr = List.of();
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    Record actual = writeAndRead(parquetSchema, row);
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    assertThat(actualVariant.value().type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualVariant.value().asArray().numElements()).isEqualTo(0);
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+  }
+
+  @Test
+  public void testArrayWithNull() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr =
+        List.of(
+            record(elementType, Map.of("typed_value", "comedy")),
+            record(elementType, Map.of("value", serialize(Variants.ofNull()))),
+            record(elementType, Map.of("typed_value", "drama")));
+
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.ofNull());
+    expectedArray.add(Variants.of("drama"));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    assertThat(actualVariant.value().type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualVariant.value().asArray().numElements()).isEqualTo(3);
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testNestedArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType outerElementType = element(list(elementType));
+    GroupType variantType = variant("var", 2, list(outerElementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> inner1 =
+        List.of(
+            record(elementType, Map.of("typed_value", "comedy")),
+            record(elementType, Map.of("typed_value", "drama")));
+    List<GenericRecord> outer1 =
+        List.of(
+            record(outerElementType, Map.of("typed_value", inner1)),
+            record(outerElementType, Map.of("typed_value", List.of())));
+    GenericRecord var =
+        record(
+            variantType,
+            Map.of("metadata", VariantTestUtil.emptyMetadata(), "typed_value", 
outer1));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    ValueArray expectedInner1 = Variants.array();
+    expectedInner1.add(Variants.of("comedy"));
+    expectedInner1.add(Variants.of("drama"));
+    ValueArray expectedInner2 = Variants.array();
+    expectedArray.add(expectedInner1);
+    expectedArray.add(expectedInner2);
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    // Verify
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testArrayWithNestedObject() throws IOException {
+    GroupType fieldA = field("a", shreddedPrimitive(PrimitiveTypeName.INT32));
+    GroupType fieldB = field("b", shreddedPrimitive(PrimitiveTypeName.BINARY, 
STRING));
+    GroupType shreddedFields = objectFields(fieldA, fieldB);
+    GroupType elementType = element(shreddedFields);
+    GroupType listType = list(elementType);
+    GroupType variantType = variant("var", 2, listType);
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    // Row 1 with nested fully shredded object
+    GenericRecord shredded1 =
+        record(
+            shreddedFields,
+            Map.of(
+                "a",
+                record(fieldA, Map.of("typed_value", 1)),
+                "b",
+                record(fieldB, Map.of("typed_value", "comedy"))));
+    GenericRecord shredded2 =
+        record(
+            shreddedFields,
+            Map.of(
+                "a",
+                record(fieldA, Map.of("typed_value", 2)),
+                "b",
+                record(fieldB, Map.of("typed_value", "drama"))));
+    List<GenericRecord> arr1 =
+        List.of(
+            record(elementType, Map.of("typed_value", shredded1)),
+            record(elementType, Map.of("typed_value", shredded2)));
+    GenericRecord var1 =
+        record(variantType, Map.of("metadata", TEST_METADATA_BUFFER, 
"typed_value", arr1));
+    GenericRecord row1 = record(parquetSchema, Map.of("id", 1, "var", var1));
+
+    ValueArray expected1 = Variants.array();
+    ShreddedObject expectedElement1 = Variants.object(TEST_METADATA);
+    expectedElement1.put("a", Variants.of(1));
+    expectedElement1.put("b", Variants.of("comedy"));
+    expected1.add(expectedElement1);
+    ShreddedObject expectedElement2 = Variants.object(TEST_METADATA);
+    expectedElement2.put("a", Variants.of(2));
+    expectedElement2.put("b", Variants.of("drama"));
+    expected1.add(expectedElement2);
+
+    // Row 2 with nested partially shredded object
+    GenericRecord shredded3 =
+        record(
+            shreddedFields,
+            Map.of(
+                "a",
+                record(fieldA, Map.of("typed_value", 3)),
+                "b",
+                record(fieldB, Map.of("typed_value", "action"))));
+    ShreddedObject baseObject3 = Variants.object(TEST_METADATA);
+    baseObject3.put("c", Variants.of("str"));
+
+    GenericRecord shredded4 =
+        record(
+            shreddedFields,
+            Map.of(
+                "a",
+                record(fieldA, Map.of("typed_value", 4)),
+                "b",
+                record(fieldB, Map.of("typed_value", "horror"))));
+    ShreddedObject baseObject4 = Variants.object(TEST_METADATA);
+    baseObject4.put("d", Variants.ofIsoDate("2024-01-30"));
+
+    List<GenericRecord> arr2 =
+        List.of(
+            record(elementType, Map.of("value", serialize(baseObject3), 
"typed_value", shredded3)),
+            record(elementType, Map.of("value", serialize(baseObject4), 
"typed_value", shredded4)));
+    GenericRecord var2 =
+        record(variantType, Map.of("metadata", TEST_METADATA_BUFFER, 
"typed_value", arr2));
+    GenericRecord row2 = record(parquetSchema, Map.of("id", 2, "var", var2));
+
+    ValueArray expected2 = Variants.array();
+    ShreddedObject expectedElement3 = Variants.object(TEST_METADATA);
+    expectedElement3.put("a", Variants.of(3));
+    expectedElement3.put("b", Variants.of("action"));
+    expectedElement3.put("c", Variants.of("str"));
+    expected2.add(expectedElement3);
+    ShreddedObject expectedElement4 = Variants.object(TEST_METADATA);
+    expectedElement4.put("a", Variants.of(4));
+    expectedElement4.put("b", Variants.of("horror"));
+    expectedElement4.put("d", Variants.ofIsoDate("2024-01-30"));
+    expected2.add(expectedElement4);
+
+    // verify
+    List<Record> actual = writeAndRead(parquetSchema, List.of(row1, row2));
+    Record actual1 = actual.get(0);
+    assertThat(actual1.getField("id")).isEqualTo(1);
+    assertThat(actual1.getField("var")).isInstanceOf(Variant.class);
+
+    Variant actualVariant1 = (Variant) actual1.getField("var");
+    VariantTestUtil.assertEqual(TEST_METADATA, actualVariant1.metadata());
+    VariantTestUtil.assertEqual(expected1, actualVariant1.value());
+
+    Record actual2 = actual.get(1);
+    assertThat(actual2.getField("id")).isEqualTo(2);
+    assertThat(actual2.getField("var")).isInstanceOf(Variant.class);
+
+    Variant actualVariant2 = (Variant) actual2.getField("var");
+    VariantTestUtil.assertEqual(TEST_METADATA, actualVariant2.metadata());
+    VariantTestUtil.assertEqual(expected2, actualVariant2.value());
+  }
+
+  @Test
+  public void testArrayWithNonArray() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr1 =
+        List.of(
+            record(elementType, Map.of("typed_value", "comedy")),
+            record(elementType, Map.of("typed_value", "drama")));
+    GenericRecord var1 =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr1));
+    GenericRecord row1 = record(parquetSchema, Map.of("id", 1, "var", var1));
+
+    ValueArray expectedArray1 = Variants.array();
+    expectedArray1.add(Variants.of("comedy"));
+    expectedArray1.add(Variants.of("drama"));
+
+    GenericRecord var2 =
+        record(
+            variantType,
+            Map.of(
+                "metadata", VariantTestUtil.emptyMetadata(), "value", 
serialize(Variants.of(34))));
+    GenericRecord row2 = record(parquetSchema, Map.of("id", 2, "var", var2));
+
+    VariantValue expectedValue2 = Variants.of(PhysicalType.INT32, 34);
+
+    GenericRecord var3 =
+        record(variantType, Map.of("metadata", TEST_METADATA_BUFFER, "value", 
TEST_OBJECT_BUFFER));
+    GenericRecord row3 = record(parquetSchema, Map.of("id", 3, "var", var3));
+
+    ShreddedObject expectedObject3 = Variants.object(TEST_METADATA);
+    expectedObject3.put("a", Variants.ofNull());
+    expectedObject3.put("d", Variants.of("iceberg"));
+
+    // Test array is read properly after a non-array
+    List<GenericRecord> arr4 =
+        List.of(
+            record(elementType, Map.of("typed_value", "action")),
+            record(elementType, Map.of("typed_value", "horror")));
+    GenericRecord var4 =
+        record(variantType, Map.of("metadata", TEST_METADATA_BUFFER, 
"typed_value", arr4));
+    GenericRecord row4 = record(parquetSchema, Map.of("id", 4, "var", var4));
+
+    ValueArray expectedArray4 = Variants.array();
+    expectedArray4.add(Variants.of("action"));
+    expectedArray4.add(Variants.of("horror"));
+
+    List<Record> actual = writeAndRead(parquetSchema, List.of(row1, row2, 
row3, row4));
+
+    // Verify
+    Record actual1 = actual.get(0);
+    assertThat(actual1.getField("id")).isEqualTo(1);
+    assertThat(actual1.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant1 = (Variant) actual1.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant1.metadata());
+    VariantTestUtil.assertEqual(expectedArray1, actualVariant1.value());
+
+    Record actual2 = actual.get(1);
+    assertThat(actual2.getField("id")).isEqualTo(2);
+    assertThat(actual2.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant2 = (Variant) actual2.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant2.metadata());
+    VariantTestUtil.assertEqual(expectedValue2, actualVariant2.value());
+
+    Record actual3 = actual.get(2);
+    assertThat(actual3.getField("id")).isEqualTo(3);
+    assertThat(actual3.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant3 = (Variant) actual3.getField("var");
+    VariantTestUtil.assertEqual(TEST_METADATA, actualVariant3.metadata());
+    VariantTestUtil.assertEqual(expectedObject3, actualVariant3.value());
+
+    Record actual4 = actual.get(3);
+    assertThat(actual4.getField("id")).isEqualTo(4);
+    assertThat(actual4.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant4 = (Variant) actual4.getField("var");
+    VariantTestUtil.assertEqual(TEST_METADATA, actualVariant4.metadata());
+    VariantTestUtil.assertEqual(expectedArray4, actualVariant4.value());
+  }
+
+  @Test
+  public void testArrayMissingValueColumn() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType =
+        Types.buildGroup(Type.Repetition.OPTIONAL)
+            .id(2)
+            .required(PrimitiveTypeName.BINARY)
+            .named("metadata")
+            .addField(list(elementType))
+            .named("var");
+
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr =
+        List.of(
+            record(elementType, Map.of("typed_value", "comedy")),
+            record(elementType, Map.of("typed_value", "drama")));
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.of("drama"));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testArrayMissingElementValueColumn() throws IOException {
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType =
+        
Types.buildGroup(Type.Repetition.REQUIRED).addField(shreddedType).named("element");
+
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    List<GenericRecord> arr =
+        List.of(
+            record(elementType, Map.of("typed_value", "comedy")),
+            record(elementType, Map.of("typed_value", "drama")));
+    GenericRecord var =
+        record(
+            variantType, Map.of("metadata", VariantTestUtil.emptyMetadata(), 
"typed_value", arr));
+    GenericRecord row = record(parquetSchema, Map.of("id", 1, "var", var));
+
+    ValueArray expectedArray = Variants.array();
+    expectedArray.add(Variants.of("comedy"));
+    expectedArray.add(Variants.of("drama"));
+
+    Record actual = writeAndRead(parquetSchema, row);
+
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantTestUtil.assertEqual(expectedArray, actualVariant.value());
+  }
+
+  @Test
+  public void testArrayWithElementNullValueAndNullTypedValue() throws 
IOException {
+    // Test the invalid case that both value and typed_value of an element are 
null
+    Type shreddedType = shreddedPrimitive(PrimitiveTypeName.BINARY, STRING);
+    GroupType elementType = element(shreddedType);
+    GroupType variantType = variant("var", 2, list(elementType));
+    MessageType parquetSchema = parquetSchema(variantType);
+
+    GenericRecord element = record(elementType, Map.of());
+    GenericRecord variant =
+        record(
+            variantType,
+            Map.of("metadata", VariantTestUtil.emptyMetadata(), "typed_value", 
List.of(element)));
+    GenericRecord record = record(parquetSchema, Map.of("id", 1, "var", 
variant));
+
+    Record actual = writeAndRead(parquetSchema, record);
+    assertThat(actual.getField("id")).isEqualTo(1);
+    assertThat(actual.getField("var")).isInstanceOf(Variant.class);
+
+    Variant actualVariant = (Variant) actual.getField("var");
+    VariantTestUtil.assertEqual(EMPTY_METADATA, actualVariant.metadata());
+    VariantValue actualValue = actualVariant.value();
+    assertThat(actualValue.type()).isEqualTo(PhysicalType.ARRAY);
+    assertThat(actualValue.asArray().numElements()).isEqualTo(1);
+    assertThat(actualValue.asArray().get(0)).isNull();

Review Comment:
   You are right. We do need to return Variant null.
   
   ```If a Variant is missing in a context where a value is required, readers 
must return a Variant null (00): basic type 0 (primitive) and physical type 0 
(null). For example, if a Variant is required (like measurement above) and both 
value and typed_value are null, the returned value must be 00 (Variant null).```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to