emkornfield commented on code in PR #11785: URL: https://github.com/apache/iceberg/pull/11785#discussion_r1886202200
########## data/src/test/java/org/apache/iceberg/data/parquet/TestGenericData.java: ########## @@ -131,14 +136,143 @@ public void testTwoLevelList() throws IOException { .reuseContainers() .createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)) .build()) { - CloseableIterator it = reader.iterator(); - assertThat(it).hasNext(); - while (it.hasNext()) { - GenericRecord actualRecord = (GenericRecord) it.next(); + for (Record actualRecord : reader) { assertThat(actualRecord.get(0, ArrayList.class)).first().isEqualTo(expectedBinary); assertThat(actualRecord.get(1, ByteBuffer.class)).isEqualTo(expectedBinary); - assertThat(it).isExhausted(); } + + assertThat(Lists.newArrayList(reader).size()).isEqualTo(1); } } + + @Test + public void testMissingRequiredWithoutDefault() { + Schema writeSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .withDoc("Should not produce default value") + .build()); + + Schema expectedSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .build(), + NestedField.required("missing_str") + .withId(6) + .ofType(Types.StringType.get()) + .withDoc("Missing required field with no default") + .build()); + + assertThatThrownBy(() -> writeAndValidate(writeSchema, expectedSchema)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Missing required field: missing_str"); + } + + @Test + public void testDefaultValues() throws IOException { + Schema writeSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .withDoc("Should not produce default value") + .build()); + + Schema expectedSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .build(), + NestedField.required("missing_str") + .withId(6) + .ofType(Types.StringType.get()) + .withInitialDefault("orange") + .build(), + NestedField.optional("missing_int") + .withId(7) + .ofType(Types.IntegerType.get()) + .withInitialDefault(34) + .build()); + + writeAndValidate(writeSchema, expectedSchema); + } + + @Test + public void testNullDefaultValue() throws IOException { + Schema writeSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .withDoc("Should not produce default value") + .build()); + + Schema expectedSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .build(), + NestedField.optional("missing_date").withId(3).ofType(Types.DateType.get()).build()); + + writeAndValidate(writeSchema, expectedSchema); + } + + @Test + public void testNestedDefaultValue() throws IOException { + Schema writeSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .withDoc("Should not produce default value") + .build(), + NestedField.optional("nested") + .withId(3) + .ofType(Types.StructType.of(required(4, "inner", Types.StringType.get()))) + .withDoc("Used to test nested field defaults") + .build()); + + Schema expectedSchema = + new Schema( + required(1, "id", Types.LongType.get()), + NestedField.optional("data") + .withId(2) + .ofType(Types.StringType.get()) + .withInitialDefault("wrong!") + .build(), + NestedField.optional("nested") + .withId(3) + .ofType( + Types.StructType.of( + required(4, "inner", Types.StringType.get()), + NestedField.optional("missing_inner_float") + .withId(5) + .ofType(Types.FloatType.get()) + .withInitialDefault(-0.0F) Review Comment: In addition to this, I'm not sure if this is the right place to test it but it would be good to make sure there test coverage that covers. The following spec requirements: * When a new field is added to a struct with a default value, updating the struct's default is optional * If a field value is missing from a struct's initial-default, the field's initial-default must be used for the field -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org