nastra commented on code in PR #13859:
URL: https://github.com/apache/iceberg/pull/13859#discussion_r2354445663
##########
spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/data/vectorized/parquet/TestParquetVectorizedReads.java:
##########
@@ -439,4 +467,83 @@ protected void assertNoLeak(String testName,
Consumer<BufferAllocator> testFunct
allocator.close();
}
}
+
+ private void assertIdenticalFileContents(
+ File actual, File expected, Schema schema, boolean vectorized) throws
IOException {
+ try (CloseableIterable<Record> expectedIterator =
+ Parquet.read(Files.localInput(expected))
+ .project(schema)
+ .createReaderFunc(msgType ->
GenericParquetReaders.buildReader(schema, msgType))
+ .build()) {
+ List<Record> expectedRecords = Lists.newArrayList(expectedIterator);
+ if (vectorized) {
+ assertRecordsMatch(
+ schema, expectedRecords.size(), expectedRecords, actual, false,
BATCH_SIZE);
+ } else {
+ try (CloseableIterable<InternalRow> actualIterator =
+ Parquet.read(Files.localInput(actual))
+ .project(schema)
+ .createReaderFunc(msgType ->
SparkParquetReaders.buildReader(schema, msgType))
+ .build()) {
+ List<InternalRow> actualRecords = Lists.newArrayList(actualIterator);
+ assertThat(actualRecords).hasSameSizeAs(expectedRecords);
+ for (int i = 0; i < actualRecords.size(); i++) {
+ GenericsHelpers.assertEqualsUnsafe(
+ schema.asStruct(), expectedRecords.get(i),
actualRecords.get(i));
+ }
+ }
+ }
+ }
+ }
+
+ static Stream<Arguments> goldenFilesAndEncodings() {
+ return GOLDEN_FILE_ENCODINGS.stream()
+ .flatMap(
+ encoding ->
+ GOLDEN_FILE_TYPES.entrySet().stream()
+ .flatMap(
+ e ->
+ Stream.of(true, false)
+ .map(
+ vectorized ->
+ Arguments.of(
+ encoding, e.getKey(),
e.getValue(), vectorized))));
+ }
+
+ private File resourceUrlToLocalFile(URL url) throws IOException,
URISyntaxException {
+ if ("file".equals(url.getProtocol())) {
+ return Paths.get(url.toURI()).toFile();
+ }
+
+ String name = Paths.get(url.getPath()).getFileName().toString(); // e.g.,
string.parquet
+ String suffix = name.contains(".") ? name.substring(name.lastIndexOf('.'))
: "";
+ File tmp = File.createTempFile("golden-", suffix, temp.toFile());
+ try (InputStream in = url.openStream()) {
+ java.nio.file.Files.copy(in, tmp.toPath(), REPLACE_EXISTING);
+ }
+ return tmp;
+ }
+
+ @ParameterizedTest
+ @MethodSource("goldenFilesAndEncodings")
+ public void testGoldenFiles(
+ String encoding, String typeName, PrimitiveType primitiveType, boolean
vectorized)
+ throws Exception {
+ Path goldenResourcePath = Paths.get("encodings", encoding, typeName +
".parquet");
+ URL goldenFileUrl =
getClass().getClassLoader().getResource(goldenResourcePath.toString());
+ assumeThat(goldenFileUrl).isNotNull().as("type/encoding pair exists");
Review Comment:
`.as()` needs to come before the final assertion as otherwise it's going to
be ignored
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]