gustavoatt commented on code in PR #7352:
URL: https://github.com/apache/iceberg/pull/7352#discussion_r1168993116
##########
data/src/test/java/org/apache/iceberg/data/TestLocalScan.java:
##########
@@ -515,13 +518,117 @@ public void testAsOfTimeOlderThanFirstSnapshot() {
"Cannot find a snapshot older than " +
DateTimeUtil.formatTimestampMillis(timestamp));
}
+ @Test
+ public void testLoadPartitionsTable() throws IOException {
+ Schema schema =
+ new Schema(
+ optional(1, "id", Types.IntegerType.get()), optional(2, "ds",
Types.StringType.get()));
+ PartitionSpec spec =
PartitionSpec.builderFor(schema).identity("ds").build();
+
+ File location = temp.newFolder("partitions_table_test_" + format.name());
+ Table table =
+ TABLES.create(
+ schema,
+ spec,
+ ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT,
format.name()),
+ location.getAbsolutePath());
+
+ GenericRecord record = GenericRecord.create(schema);
+
+ // Create two files with different partitions ds=2021-01-01 and
ds=2021-01-02.
+ List<Record> firstFileRecords =
+ Lists.newArrayList(
+ record.copy(ImmutableMap.of("id", 1, "ds", "2021-01-01")),
+ record.copy(ImmutableMap.of("id", 2, "ds", "2021-01-01")));
+ List<Record> secondFileRecords =
+ Lists.newArrayList(
+ record.copy(ImmutableMap.of("id", 3, "ds", "2021-01-02")),
+ record.copy(ImmutableMap.of("id", 4, "ds", "2021-01-02")));
+
+ PartitionKey firstPartitionKey = new PartitionKey(spec, schema);
+ firstPartitionKey.partition(firstFileRecords.get(0));
+ PartitionKey secondPartitionKey = new PartitionKey(spec, schema);
+ secondPartitionKey.partition(secondFileRecords.get(0));
+
+ DataFile df1 =
+ writeFile(
+ location.getAbsolutePath(),
+ format.addExtension("first"),
+ schema,
+ spec,
+ firstPartitionKey,
+ firstFileRecords);
+ DataFile df2 =
+ writeFile(
+ location.getAbsolutePath(),
+ format.addExtension("second"),
+ schema,
+ spec,
+ secondPartitionKey,
+ secondFileRecords);
+ table.newAppend().appendFile(df1).appendFile(df2).commit();
+
+ Table partitionsTable = TABLES.load(table.name() + "#partitions");
+
+ // Verify that we can read the partitions table correctly, and we can get
both partitions.
+ Set<Record> actualRecords =
+
Sets.newHashSet(IcebergGenerics.read(partitionsTable).select("partition").build());
Review Comment:
@edgarRd I tried doing that, but when I do that I still get a record with
schema `struct partition<ds: string>`. I believe the column projection projects
what is needed but it keeps it all in the full schema path (i.e. it discards
unnecessary columns but we still need to fully address the project columns
`partition.ds`).
So it unfortunately does not make the test simpler.
##########
data/src/test/java/org/apache/iceberg/data/TestLocalScan.java:
##########
@@ -515,13 +518,117 @@ public void testAsOfTimeOlderThanFirstSnapshot() {
"Cannot find a snapshot older than " +
DateTimeUtil.formatTimestampMillis(timestamp));
}
+ @Test
+ public void testLoadPartitionsTable() throws IOException {
+ Schema schema =
+ new Schema(
+ optional(1, "id", Types.IntegerType.get()), optional(2, "ds",
Types.StringType.get()));
+ PartitionSpec spec =
PartitionSpec.builderFor(schema).identity("ds").build();
+
+ File location = temp.newFolder("partitions_table_test_" + format.name());
+ Table table =
+ TABLES.create(
+ schema,
+ spec,
+ ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT,
format.name()),
+ location.getAbsolutePath());
+
+ GenericRecord record = GenericRecord.create(schema);
+
+ // Create two files with different partitions ds=2021-01-01 and
ds=2021-01-02.
+ List<Record> firstFileRecords =
+ Lists.newArrayList(
+ record.copy(ImmutableMap.of("id", 1, "ds", "2021-01-01")),
+ record.copy(ImmutableMap.of("id", 2, "ds", "2021-01-01")));
+ List<Record> secondFileRecords =
+ Lists.newArrayList(
+ record.copy(ImmutableMap.of("id", 3, "ds", "2021-01-02")),
+ record.copy(ImmutableMap.of("id", 4, "ds", "2021-01-02")));
+
+ PartitionKey firstPartitionKey = new PartitionKey(spec, schema);
+ firstPartitionKey.partition(firstFileRecords.get(0));
+ PartitionKey secondPartitionKey = new PartitionKey(spec, schema);
+ secondPartitionKey.partition(secondFileRecords.get(0));
+
+ DataFile df1 =
+ writeFile(
+ location.getAbsolutePath(),
+ format.addExtension("first"),
+ schema,
+ spec,
+ firstPartitionKey,
+ firstFileRecords);
+ DataFile df2 =
+ writeFile(
+ location.getAbsolutePath(),
+ format.addExtension("second"),
+ schema,
+ spec,
+ secondPartitionKey,
+ secondFileRecords);
+ table.newAppend().appendFile(df1).appendFile(df2).commit();
+
+ Table partitionsTable = TABLES.load(table.name() + "#partitions");
+
+ // Verify that we can read the partitions table correctly, and we can get
both partitions.
+ Set<Record> actualRecords =
+
Sets.newHashSet(IcebergGenerics.read(partitionsTable).select("partition").build());
Review Comment:
@edgarRd I tried doing that, but when I do that I still get a record with
schema `struct partition<ds: string>`. I believe the column projection projects
what is needed but it keeps it all in the full schema path (i.e. it discards
unnecessary columns but we still need to fully address the project columns
`partition.ds`)
https://github.com/apache/iceberg/blob/master/core/src/main/java/org/apache/iceberg/BaseScan.java#L239.
So it unfortunately does not make the test simpler.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]