This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ad2f611e40b7 [SPARK-51840][SQL] Restore Partition columns in HiveExternalCatalog#alterTable ad2f611e40b7 is described below commit ad2f611e40b7fe0338a7abb6a3337e5204fc71f6 Author: Kent Yao <y...@apache.org> AuthorDate: Sun Apr 20 20:24:08 2025 +0900 [SPARK-51840][SQL] Restore Partition columns in HiveExternalCatalog#alterTable ### What changes were proposed in this pull request? This PR restores the partition columns information in HiveExternalCatalog#alterTable API. Otherwise, many of the commands involved will fail, such as CTAS, MSCK on Spark-specific metastore schemas, etc. ### Why are the changes needed? improvement for partitioned non-hive compatible tables, ``` [info] Cause: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to alter table. partition keys can not be changed. [info] at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:634) [info] at org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:612) [info] at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) [info] at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) [info] at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) [info] at java.base/java.lang.reflect.Method.invoke(Method.java:569) ``` ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test ### Was this patch authored or co-authored using generative AI tooling? no Closes #50623 from yaooqinn/partitions. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../org/apache/spark/sql/hive/HiveExternalCatalog.scala | 11 +++++++++-- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 14 ++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 99d09395fc31..01be6ff2116f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -666,12 +666,19 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat val newTableProps = propsFromOldTable ++ tableDefinition.properties + partitionProviderProp ++ newFormatIfExists + val (newSchema, partitionColumnNames) = if (oldTableDef.schema == EMPTY_DATA_SCHEMA) { + val restoredOldTable = restoreTableMetadata(oldTableDef) + (StructType(EMPTY_DATA_SCHEMA ++ restoredOldTable.partitionSchema), + restoredOldTable.partitionColumnNames) + } else { + (oldTableDef.schema, oldTableDef.partitionColumnNames) + } // // Add old table's owner if we need to restore val owner = Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner) val newDef = tableDefinition.copy( storage = newStorage, - schema = oldTableDef.schema, - partitionColumnNames = oldTableDef.partitionColumnNames, + schema = newSchema, + partitionColumnNames = partitionColumnNames, bucketSpec = oldTableDef.bucketSpec, properties = newTableProps, owner = owner) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index a47efaf1cc28..acd2f13de1eb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution import java.io.File import java.net.URI +import java.time.LocalDateTime import java.util.Locale import org.apache.hadoop.fs.Path @@ -610,6 +611,19 @@ class HiveDDLSuite ) } + test("SPARK-51840: Restore Partition columns in HiveExternalCatalog#alterTable") { + withTable("t") { + sql( + """ + |CREATE TABLE t USING json + | PARTITIONED BY (A) AS + | SELECT 'APACHE' A, TIMESTAMP_NTZ '2018-11-17 13:33:33' B + |""".stripMargin) + sql("MSCK REPAIR TABLE t") + checkAnswer(spark.table("t"), Row(LocalDateTime.of(2018, 11, 17, 13, 33, 33), "APACHE")) + } + } + test("add/drop partitions - external table") { val catalog = spark.sessionState.catalog withTempDir { tmpDir => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org