This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new 35a8e1e [SPARK-31061][SQL] Provide ability to alter the provider of a
table
35a8e1e is described below
commit 35a8e1ed7bb2f9493742d7401e32eab4621fd2d0
Author: Burak Yavuz <[email protected]>
AuthorDate: Fri Mar 6 15:40:44 2020 +0800
[SPARK-31061][SQL] Provide ability to alter the provider of a table
This PR adds functionality to HiveExternalCatalog to be able to change the
provider of a table.
This is useful for catalogs in Spark 3.0 to be able to use alterTable to
change the provider of a table as part of an atomic REPLACE TABLE function.
No
Unit tests
Closes #27822 from brkyvz/externalCat.
Authored-by: Burak Yavuz <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 2e3adadc6a53fe044b286a6a59529a94e7eeda6c)
Signed-off-by: Wenchen Fan <[email protected]>
---
.../spark/sql/hive/HiveExternalCatalog.scala | 10 +++++-
.../spark/sql/hive/HiveExternalCatalogSuite.scala | 40 ++++++++++++++++++++++
2 files changed, 49 insertions(+), 1 deletion(-)
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index ca292f6..be6d824 100644
---
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -634,7 +634,15 @@ private[spark] class HiveExternalCatalog(conf: SparkConf,
hadoopConf: Configurat
k.startsWith(DATASOURCE_PREFIX) || k.startsWith(STATISTICS_PREFIX) ||
k.startsWith(CREATED_SPARK_VERSION)
}
- val newTableProps = propsFromOldTable ++ tableDefinition.properties +
partitionProviderProp
+ val newFormatIfExists = tableDefinition.provider.flatMap { p =>
+ if (DDLUtils.isDatasourceTable(tableDefinition)) {
+ Some(DATASOURCE_PROVIDER -> p)
+ } else {
+ None
+ }
+ }
+ val newTableProps =
+ propsFromOldTable ++ tableDefinition.properties +
partitionProviderProp ++ newFormatIfExists
// // Add old table's owner if we need to restore
val owner =
Option(tableDefinition.owner).filter(_.nonEmpty).getOrElse(oldTableDef.owner)
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index 0a88898..473a93b 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -17,6 +17,8 @@
package org.apache.spark.sql.hive
+import java.net.URI
+
import org.apache.hadoop.conf.Configuration
import org.apache.spark.SparkConf
@@ -178,4 +180,42 @@ class HiveExternalCatalogSuite extends
ExternalCatalogSuite {
assertThrows[QueryExecutionException](client.runSqlHive(
"INSERT overwrite directory \"fs://localhost/tmp\" select 1 as a"))
}
+
+ test("SPARK-31061: alterTable should be able to change table provider") {
+ val catalog = newBasicCatalog()
+ val parquetTable = CatalogTable(
+ identifier = TableIdentifier("parq_tbl", Some("db1")),
+ tableType = CatalogTableType.MANAGED,
+ storage = storageFormat.copy(locationUri = Some(new
URI("file:/some/path"))),
+ schema = new StructType().add("col1", "int").add("col2", "string"),
+ provider = Some("parquet"))
+ catalog.createTable(parquetTable, ignoreIfExists = false)
+
+ val rawTable = externalCatalog.getTable("db1", "parq_tbl")
+ assert(rawTable.provider === Some("parquet"))
+
+ val fooTable = parquetTable.copy(provider = Some("foo"))
+ catalog.alterTable(fooTable)
+ val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
+ assert(alteredTable.provider === Some("foo"))
+ }
+
+ test("SPARK-31061: alterTable should be able to change table provider from
hive") {
+ val catalog = newBasicCatalog()
+ val hiveTable = CatalogTable(
+ identifier = TableIdentifier("parq_tbl", Some("db1")),
+ tableType = CatalogTableType.MANAGED,
+ storage = storageFormat,
+ schema = new StructType().add("col1", "int").add("col2", "string"),
+ provider = Some("hive"))
+ catalog.createTable(hiveTable, ignoreIfExists = false)
+
+ val rawTable = externalCatalog.getTable("db1", "parq_tbl")
+ assert(rawTable.provider === Some("hive"))
+
+ val fooTable = rawTable.copy(provider = Some("foo"))
+ catalog.alterTable(fooTable)
+ val alteredTable = externalCatalog.getTable("db1", "parq_tbl")
+ assert(alteredTable.provider === Some("foo"))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]