This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new b035bb177c08 [SPARK-46370][SQL] Fix bug when querying from table after
changing column defaults
b035bb177c08 is described below
commit b035bb177c0875cfb7edb6d8672d4d2ac2813d1b
Author: Daniel Tenedorio <[email protected]>
AuthorDate: Tue Dec 12 14:44:27 2023 -0800
[SPARK-46370][SQL] Fix bug when querying from table after changing column
defaults
### What changes were proposed in this pull request?
This PR fixes a bug when querying from table after changing defaults:
```
drop table if exists t;
create table t(i int, s string default 'def') using parquet;
insert into t select 1, default;
alter table t alter column s drop default;
insert into t select 2, default;
select * from t; -- Removing this line changes the following results!
alter table t alter column s set default 'mno';
insert into t select 3, default;
select * from t;
```
The bug is related to the relation cache, and the fix involves adding a
manual refresh to the cache to make sure we use the right table schema.
### Why are the changes needed?
This PR fixes a correctness bug.
### Does this PR introduce _any_ user-facing change?
Yes, see above.
### How was this patch tested?
This PR adds test coverage.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #44302 from dtenedor/fix-default-bug.
Authored-by: Daniel Tenedorio <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../apache/spark/sql/execution/command/ddl.scala | 3 +++
.../org/apache/spark/sql/sources/InsertSuite.scala | 24 ++++++++++++++++++++++
2 files changed, 27 insertions(+)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 7e001803592f..dc1c5b3fd580 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -374,6 +374,9 @@ case class AlterTableChangeColumnCommand(
// TODO: support change column name/dataType/metadata/position.
override def run(sparkSession: SparkSession): Seq[Row] = {
val catalog = sparkSession.sessionState.catalog
+ // This command may change column default values, so we need to refresh
the table relation cache
+ // here so that DML commands can resolve these default values correctly.
+ catalog.refreshTable(tableName)
val table = catalog.getTableRawMetadata(tableName)
val resolver = sparkSession.sessionState.conf.resolver
DDLUtils.verifyAlterTableType(catalog, table, isView = false)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 94535bc84a4c..76073a108a3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -2608,6 +2608,30 @@ class InsertSuite extends DataSourceTest with
SharedSparkSession {
}
}
+ test("SPARK-46370: Querying a table should not invalidate the column
defaults") {
+ withTable("t") {
+ // Create a table and insert some rows into it, changing the default
value of a column
+ // throughout.
+ spark.sql("CREATE TABLE t(i INT, s STRING DEFAULT 'def') USING CSV")
+ spark.sql("INSERT INTO t SELECT 1, DEFAULT")
+ spark.sql("ALTER TABLE t ALTER COLUMN s DROP DEFAULT")
+ spark.sql("INSERT INTO t SELECT 2, DEFAULT")
+ // Run a query to trigger the table relation cache.
+ val results = spark.table("t").collect()
+ assert(results.length == 2)
+ // Change the column default value and insert another row. Then query
the table's contents
+ // and the results should be correct.
+ spark.sql("ALTER TABLE t ALTER COLUMN s SET DEFAULT 'mno'")
+ spark.sql("INSERT INTO t SELECT 3, DEFAULT").collect()
+ checkAnswer(
+ spark.table("t"),
+ Seq(
+ Row(1, "def"),
+ Row(2, null),
+ Row(3, "mno")))
+ }
+ }
+
test("UNSUPPORTED_OVERWRITE.TABLE: Can't overwrite a table that is also
being read from") {
val tableName = "t1"
withTable(tableName) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]