(spark) branch master updated: [SPARK-51840][SQL] Restore Partition columns in HiveExternalCatalog#alterTable

dongjoon Sun, 20 Apr 2025 04:24:31 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new ad2f611e40b7 [SPARK-51840][SQL] Restore Partition columns in 
HiveExternalCatalog#alterTable
ad2f611e40b7 is described below

commit ad2f611e40b7fe0338a7abb6a3337e5204fc71f6
Author: Kent Yao <y...@apache.org>
AuthorDate: Sun Apr 20 20:24:08 2025 +0900

    [SPARK-51840][SQL] Restore Partition columns in 
HiveExternalCatalog#alterTable
    
    ### What changes were proposed in this pull request?
    
    This PR restores the partition columns information in 
HiveExternalCatalog#alterTable API. Otherwise, many of the commands involved 
will fail, such as CTAS, MSCK on Spark-specific metastore schemas, etc.
    
    ### Why are the changes needed?
    improvement for partitioned non-hive compatible tables,
    
    ```
    [info]   Cause: org.apache.hadoop.hive.ql.metadata.HiveException: Unable to 
alter table. partition keys can not be changed.
    [info]   at 
org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:634)
    [info]   at 
org.apache.hadoop.hive.ql.metadata.Hive.alterTable(Hive.java:612)
    [info]   at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    [info]   at 
java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
    [info]   at 
java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    [info]   at java.base/java.lang.reflect.Method.invoke(Method.java:569)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #50623 from yaooqinn/partitions.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../org/apache/spark/sql/hive/HiveExternalCatalog.scala    | 11 +++++++++--
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 99d09395fc31..01be6ff2116f 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -666,12 +666,19 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, 
hadoopConf: Configurat
       val newTableProps =
         propsFromOldTable ++ tableDefinition.properties + 
partitionProviderProp ++ newFormatIfExists
 
+      val (newSchema, partitionColumnNames) = if (oldTableDef.schema == 
EMPTY_DATA_SCHEMA) {
+        val restoredOldTable = restoreTableMetadata(oldTableDef)
+        (StructType(EMPTY_DATA_SCHEMA ++ restoredOldTable.partitionSchema),
+          restoredOldTable.partitionColumnNames)
+      } else {
+        (oldTableDef.schema, oldTableDef.partitionColumnNames)
+      }
       // // Add old table's owner if we need to restore
       val owner = 
Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner)
       val newDef = tableDefinition.copy(
         storage = newStorage,
-        schema = oldTableDef.schema,
-        partitionColumnNames = oldTableDef.partitionColumnNames,
+        schema = newSchema,
+        partitionColumnNames = partitionColumnNames,
         bucketSpec = oldTableDef.bucketSpec,
         properties = newTableProps,
         owner = owner)
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index a47efaf1cc28..acd2f13de1eb 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import java.io.File
 import java.net.URI
+import java.time.LocalDateTime
 import java.util.Locale
 
 import org.apache.hadoop.fs.Path
@@ -610,6 +611,19 @@ class HiveDDLSuite
     )
   }
 
+  test("SPARK-51840: Restore Partition columns in 
HiveExternalCatalog#alterTable") {
+    withTable("t") {
+      sql(
+        """
+          |CREATE TABLE t USING json
+          |  PARTITIONED BY (A) AS
+          |    SELECT 'APACHE' A, TIMESTAMP_NTZ '2018-11-17 13:33:33' B
+          |""".stripMargin)
+      sql("MSCK REPAIR TABLE t")
+      checkAnswer(spark.table("t"), Row(LocalDateTime.of(2018, 11, 17, 13, 33, 
33), "APACHE"))
+    }
+  }
+
   test("add/drop partitions - external table") {
     val catalog = spark.sessionState.catalog
     withTempDir { tmpDir =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-51840][SQL] Restore Partition columns in HiveExternalCatalog#alterTable

Reply via email to