szehon-ho commented on issue #6670:
URL: https://github.com/apache/iceberg/issues/6670#issuecomment-1405863632

   A test that demonstrates this (a bit longer , that uses 
ManifestReader/Writer).  It demonstrates that trying to use values read from 
ManifestReader in a StructLikeMap does not work:
   
   ```
     @Test
     public void testBug() throws IOException {
       Schema schema = new Schema(
               required(1, "a", Types.IntegerType.get()), required(2, "b", 
Types.IntegerType.get()));
   
       PartitionSpec spec =
           PartitionSpec.builderFor(schema).identity("a").identity("b").build();
   
       StructLikeMap map = StructLikeMap.create(spec.partitionType());
   
       PartitionData data1 = new PartitionData(spec.partitionType());
       data1.set(0, 20220726);
       data1.set(1, 20220801);
   
       PartitionData data2 = new PartitionData(spec.partitionType());
       data2.set(0, 20220728);
       data2.set(1, 20220719);
   
       DataFile file1 =
           DataFiles.builder(spec)
               .withPath("/path/to/data-1.parquet")
               .withFileSizeInBytes(10)
               .withPartition(data1)
               .withRecordCount(1)
               .build();
   
       DataFile file2 =
           DataFiles.builder(spec)
               .withPath("/path/to/data-2.parquet")
               .withFileSizeInBytes(10)
               .withPartition(data2)
               .withRecordCount(1)
               .build();
   
       ManifestFile manifest =
           writeManifestTest(
               1000L,
               spec,
               "foo.avro",
               manifestEntry(Status.ADDED, null, file1),
               manifestEntry(Status.ADDED, null, file2));
   
       AtomicInteger integer = new AtomicInteger();
       try (ManifestReader<DataFile> reader =
                ManifestFiles.read(manifest, new HadoopFileIO(new 
Configuration()))) {
         try (CloseableIterable<ManifestEntry<DataFile>> entries = 
reader.entries()) {
           try (CloseableIterator<ManifestEntry<DataFile>> iterator = 
entries.iterator()) {
             while (iterator.hasNext()) {
               ManifestEntry<DataFile> next = iterator.next();
               map.computeIfAbsent(next.file().partition(), a -> 
integer.incrementAndGet());
             }
           }
         }
       }
   
       Assert.assertEquals(2, map.get(data2));  // fails, returns 1
       Assert.assertEquals(1, map.get(data1));
     }
   
     private <F extends ContentFile<F>> ManifestFile writeManifestTest(
         Long snapshotId, PartitionSpec spec, String fileName, 
ManifestEntry<?>... entries) throws IOException {
       File manifestFile = temp.newFile(fileName);
       Assert.assertTrue(manifestFile.delete());
       OutputFile outputFile = HadoopOutputFile.fromPath(new 
Path(manifestFile.getCanonicalPath()), FileSystem.get(new Configuration()));
   
       ManifestWriter<F> writer;
       writer =
             (ManifestWriter<F>)
                 ManifestFiles.write(1, spec, outputFile, snapshotId);
   
       try {
         for (ManifestEntry<?> entry : entries) {
           writer.addEntry((ManifestEntry<F>) entry);
         }
       } finally {
         writer.close();
       }
   
       return writer.toManifestFile();
     }
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to