Re: [PR] Ignore schema merge updates from long -> int [iceberg]

via GitHub Tue, 12 Nov 2024 10:23:38 -0800


rocco408 commented on code in PR #11419:
URL: https://github.com/apache/iceberg/pull/11419#discussion_r1838575824



##########
spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestDataFrameWriterV2.java:
##########
@@ -244,4 +247,132 @@ public void testMergeSchemaSparkConfiguration() throws 
Exception {
             row(4L, "d", 140000.56F)),
         sql("select * from %s order by id", tableName));
   }
+
+  @TestTemplate
+  public void testMergeSchemaIgnoreCastingLongToInt() throws Exception {
+    sql(
+        "ALTER TABLE %s SET TBLPROPERTIES ('%s'='true')",
+        tableName, TableProperties.SPARK_WRITE_ACCEPT_ANY_SCHEMA);
+
+    Dataset<Row> bigintDF =
+        jsonToDF(
+            "id bigint, data string",
+            "{ \"id\": 1, \"data\": \"a\" }",
+            "{ \"id\": 2, \"data\": \"b\" }");
+
+    bigintDF.writeTo(tableName).append();
+
+    assertEquals(
+        "Should have initial rows with long column",
+        ImmutableList.of(row(1L, "a"), row(2L, "b")),
+        sql("select * from %s order by id", tableName));
+
+    Dataset<Row> intDF =
+        jsonToDF(
+            "id int, data string",
+            "{ \"id\": 3, \"data\": \"c\" }",
+            "{ \"id\": 4, \"data\": \"d\" }");
+
+    assertThatCode(() -> intDF.writeTo(tableName).option("merge-schema", 
"true").append())
+        .doesNotThrowAnyException();
+
+    assertEquals(
+        "Should include new rows with unchanged long column type",
+        ImmutableList.of(row(1L, "a"), row(2L, "b"), row(3L, "c"), row(4L, 
"d")),
+        sql("select * from %s order by id", tableName));
+
+    // verify the column type did not change
+    Types.NestedField idField =
+        Spark3Util.loadIcebergTable(spark, tableName).schema().findField("id");
+    assertThat(idField.type().typeId().equals(Type.TypeID.LONG));
+  }
+
+  @TestTemplate
+  public void testMergeSchemaIgnoreCastingDoubleToFloat() throws Exception {
+    removeTables();
+    sql("CREATE TABLE %s (id double, data string) USING iceberg", tableName);
+    sql(
+        "ALTER TABLE %s SET TBLPROPERTIES ('%s'='true')",
+        tableName, TableProperties.SPARK_WRITE_ACCEPT_ANY_SCHEMA);
+
+    Dataset<Row> doubleDF =
+        jsonToDF(
+            "id double, data string",
+            "{ \"id\": 1.0, \"data\": \"a\" }",
+            "{ \"id\": 2.0, \"data\": \"b\" }");
+
+    doubleDF.writeTo(tableName).append();
+
+    assertEquals(
+        "Should have initial rows with double column",
+        ImmutableList.of(row(1.0, "a"), row(2.0, "b")),
+        sql("select * from %s order by id", tableName));
+
+    Dataset<Row> floatDF =
+        jsonToDF(
+            "id float, data string",
+            "{ \"id\": 3.0, \"data\": \"c\" }",
+            "{ \"id\": 4.0, \"data\": \"d\" }");
+
+    assertThatCode(() -> floatDF.writeTo(tableName).option("merge-schema", 
"true").append())
+        .doesNotThrowAnyException();
+
+    assertEquals(
+        "Should include new rows with unchanged double column type",
+        ImmutableList.of(row(1.0, "a"), row(2.0, "b"), row(3.0, "c"), row(4.0, 
"d")),
+        sql("select * from %s order by id", tableName));
+
+    // verify the column type did not change
+    Types.NestedField idField =
+        Spark3Util.loadIcebergTable(spark, tableName).schema().findField("id");
+    assertThat(idField.type().typeId().equals(Type.TypeID.DOUBLE));
+  }
+
+  @TestTemplate
+  public void 
testMergeSchemaIgnoreCastingDecimalToDecimalWithNarrowerPrecision() throws 
Exception {

Review Comment:
   I'm not in love with the length of this test but it's clear what it does 😅 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Re: [PR] Ignore schema merge updates from long -> int [iceberg]

Reply via email to