szehon-ho commented on issue #6670: URL: https://github.com/apache/iceberg/issues/6670#issuecomment-1405863632
A test that demonstrates this (a bit longer , that uses ManifestReader/Writer). It demonstrates that trying to use values read from ManifestReader in a StructLikeMap does not work: ``` @Test public void testBug() throws IOException { Schema schema = new Schema( required(1, "a", Types.IntegerType.get()), required(2, "b", Types.IntegerType.get())); PartitionSpec spec = PartitionSpec.builderFor(schema).identity("a").identity("b").build(); StructLikeMap map = StructLikeMap.create(spec.partitionType()); PartitionData data1 = new PartitionData(spec.partitionType()); data1.set(0, 20220726); data1.set(1, 20220801); PartitionData data2 = new PartitionData(spec.partitionType()); data2.set(0, 20220728); data2.set(1, 20220719); DataFile file1 = DataFiles.builder(spec) .withPath("/path/to/data-1.parquet") .withFileSizeInBytes(10) .withPartition(data1) .withRecordCount(1) .build(); DataFile file2 = DataFiles.builder(spec) .withPath("/path/to/data-2.parquet") .withFileSizeInBytes(10) .withPartition(data2) .withRecordCount(1) .build(); ManifestFile manifest = writeManifestTest( 1000L, spec, "foo.avro", manifestEntry(Status.ADDED, null, file1), manifestEntry(Status.ADDED, null, file2)); AtomicInteger integer = new AtomicInteger(); try (ManifestReader<DataFile> reader = ManifestFiles.read(manifest, new HadoopFileIO(new Configuration()))) { try (CloseableIterable<ManifestEntry<DataFile>> entries = reader.entries()) { try (CloseableIterator<ManifestEntry<DataFile>> iterator = entries.iterator()) { while (iterator.hasNext()) { ManifestEntry<DataFile> next = iterator.next(); map.computeIfAbsent(next.file().partition(), a -> integer.incrementAndGet()); } } } } Assert.assertEquals(2, map.get(data2)); // fails, returns 1 Assert.assertEquals(1, map.get(data1)); } private <F extends ContentFile<F>> ManifestFile writeManifestTest( Long snapshotId, PartitionSpec spec, String fileName, ManifestEntry<?>... entries) throws IOException { File manifestFile = temp.newFile(fileName); Assert.assertTrue(manifestFile.delete()); OutputFile outputFile = HadoopOutputFile.fromPath(new Path(manifestFile.getCanonicalPath()), FileSystem.get(new Configuration())); ManifestWriter<F> writer; writer = (ManifestWriter<F>) ManifestFiles.write(1, spec, outputFile, snapshotId); try { for (ManifestEntry<?> entry : entries) { writer.addEntry((ManifestEntry<F>) entry); } } finally { writer.close(); } return writer.toManifestFile(); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org