xndai commented on code in PR #16343:
URL: https://github.com/apache/iceberg/pull/16343#discussion_r3262732043
##########
arrow/src/test/java/org/apache/iceberg/arrow/vectorized/TestArrowReader.java:
##########
@@ -436,6 +436,157 @@ public void testUnsignedSmallIntegerColumnRoundtrips(int
unsignedBitWidth, int v
assertThat(totalRows).isEqualTo(1);
}
+ /**
+ * Tests that the vectorized reader correctly handles int-to-long type
promotion when the Parquet
+ * file has an INT(32, true) logical type annotation. This reproduces a bug
where reading a file
+ * written with INT(32) logical type after an ALTER TABLE promoting the
column from int to long
+ * causes a ClassCastException (BigIntVector cannot be cast to IntVector).
+ *
+ * <p>The vector remains an IntVector (matching the physical storage), and
the accessor handles
+ * widening to long on read.
+ */
+ @Test
+ public void testIntToLongPromotionWithLogicalType() throws Exception {
+ tables = new HadoopTables();
+ Schema schema = new Schema(Types.NestedField.required(1, "col",
Types.IntegerType.get()));
+ Table table = tables.create(schema, tempDir.toURI() +
"/int-promotion-logical");
+
+ // Write a Parquet file with INT(32, signed) logical type annotation.
+ // This is what non-Iceberg writers (PyArrow, Spark native, etc.)
typically produce.
+ MessageType parquetSchema =
+ new MessageType(
+ "test",
+ primitive(PrimitiveType.PrimitiveTypeName.INT32,
Type.Repetition.REQUIRED)
+ .as(LogicalTypeAnnotation.intType(32, true))
+ .id(1)
+ .named("col"));
+
+ File testFile = new File(tempDir, "int-logical-type-promotion.parquet");
+ List<Integer> values = ImmutableList.of(1, 2, 3, Integer.MAX_VALUE);
+ try (ParquetWriter<Group> writer =
+ ExampleParquetWriter.builder(new
Path(testFile.toURI())).withType(parquetSchema).build()) {
+ SimpleGroupFactory factory = new SimpleGroupFactory(parquetSchema);
+ for (int val : values) {
+ Group group = factory.newGroup();
+ group.add("col", val);
+ writer.write(group);
+ }
+ }
+
+ DataFile dataFile =
+ DataFiles.builder(PartitionSpec.unpartitioned())
+ .withPath(testFile.getAbsolutePath())
+ .withFileSizeInBytes(testFile.length())
+ .withFormat(FileFormat.PARQUET)
+ .withRecordCount(values.size())
+ .build();
+ table.newAppend().appendFile(dataFile).commit();
+
+ // Promote the column type from int to long (simulates ALTER TABLE)
+ table.updateSchema().updateColumn("col", Types.LongType.get()).commit();
+ table = tables.load(tempDir.toURI() + "/int-promotion-logical");
+
+ // Read with the vectorized reader — the underlying vector is IntVector
(physical type),
+ // but the accessor correctly widens to long when getLong() is called.
+ int totalRows = 0;
+ int rowIndex = 0;
Review Comment:
fixed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]