RussellSpitzer commented on code in PR #12212: URL: https://github.com/apache/iceberg/pull/12212#discussion_r2072201554
########## spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestIcebergSourceTablesBase.java: ########## @@ -2170,6 +2173,99 @@ public void testTableWithInt96Timestamp() throws IOException { } } + @Test + public void testSparkTableWithMissingFilesFailure() throws IOException { + TableIdentifier tableIdentifier = TableIdentifier.of("db", "missing_files_test"); + Table table = createTable(tableIdentifier, SCHEMA, SPEC); + + File parquetTablePath = temp.newFolder("table_missing_files"); + String parquetTableLocation = parquetTablePath.toURI().toString(); + spark.sql( + String.format( + "CREATE TABLE parquet_table (data string, id int) " + + "USING parquet PARTITIONED BY (id) LOCATION '%s'", + parquetTableLocation)); + + List<SimpleRecord> records = + Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b")); + + Dataset<Row> inputDF = spark.createDataFrame(records, SimpleRecord.class); + inputDF.select("data", "id").write().mode("overwrite").insertInto("parquet_table"); + + // Add a Spark partition of which location is missing + spark.sql("ALTER TABLE parquet_table ADD PARTITION (id = 1234)"); + Path partitionLocationPath = parquetTablePath.toPath().resolve("id=1234"); + java.nio.file.Files.delete(partitionLocationPath); + + NameMapping mapping = MappingUtil.create(table.schema()); + String mappingJson = NameMappingParser.toJson(mapping); + + table.updateProperties().set(TableProperties.DEFAULT_NAME_MAPPING, mappingJson).commit(); + + String stagingLocation = table.location() + "/metadata"; + + assertThatThrownBy( + () -> + SparkTableUtil.importSparkTable( + spark, + new org.apache.spark.sql.catalyst.TableIdentifier("parquet_table"), + table, + stagingLocation)) + .hasMessageContaining("Unable to list files in partition") Review Comment: we can also check that the missing string contains partitionLocaltionPath -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org