This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new caab724 [SPARK-34359][SQL][3.1] Add a legacy config to restore the
output schema of SHOW DATABASES
caab724 is described below
commit caab724801e75259e043a79bf996b6d9e0ac2a7b
Author: Wenchen Fan <[email protected]>
AuthorDate: Fri Feb 5 20:43:48 2021 +0800
[SPARK-34359][SQL][3.1] Add a legacy config to restore the output schema of
SHOW DATABASES
This backports https://github.com/apache/spark/pull/31474 to 3.1/3.0
This is a followup of https://github.com/apache/spark/pull/26006
In #26006 , we merged the v1 and v2 SHOW DATABASES/NAMESPACES commands, but
we missed a behavior change that the output schema of SHOW DATABASES becomes
different.
This PR adds a legacy config to restore the old schema, with a migration
guide item to mention this behavior change.
Improve backward compatibility
No (the legacy config is false by default)
a new test
Closes #31486 from cloud-fan/command-schema.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
(cherry picked from commit 7c87b48029e12ed0ce0b1b37f436ffb3d85ee83c)
Signed-off-by: Wenchen Fan <[email protected]>
---
docs/sql-migration-guide.md | 2 ++
.../org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala | 9 ++++++---
.../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 9 +++++++++
.../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala | 8 ++++++++
.../sql/execution/datasources/v2/DataSourceV2Strategy.scala | 4 ++--
.../scala/org/apache/spark/sql/execution/command/DDLSuite.scala | 6 ++++++
6 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index c9b02da..742a05b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -32,6 +32,8 @@ license: |
- In Spark 3.0.2, `PARTITION(col=null)` is always parsed as a null literal
in the partition spec. In Spark 3.0.1 or earlier, it is parsed as a string
literal of its text representation, e.g., string "null", if the partition
column is string type. To restore the legacy behavior, you can set
`spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true.
+ - In Spark 3.0.0, the output schema of `SHOW DATABASES` becomes `namespace:
string`. In Spark version 2.4 and earlier, the schema was `databaseName:
string`. Since Spark 3.0.2, you can restore the old schema by setting
`spark.sql.legacy.keepCommandOutputSchema` to `true`.
+
## Upgrading from Spark SQL 3.0 to 3.0.1
- In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer
TimestampType from string values if they match to the pattern defined by the
JSON option `timestampFormat`. Since version 3.0.1, the timestamp type
inference is disabled by default. Set the JSON option `inferTimestamp` to
`true` to enable such type inference.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 9077f7a..3f4a893 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -300,11 +300,14 @@ case class AlterNamespaceSetLocation(
*/
case class ShowNamespaces(
namespace: LogicalPlan,
- pattern: Option[String]) extends Command {
+ pattern: Option[String],
+ override val output: Seq[Attribute] = ShowNamespaces.OUTPUT) extends
Command {
override def children: Seq[LogicalPlan] = Seq(namespace)
+ override def producedAttributes: AttributeSet = outputSet
+}
- override val output: Seq[Attribute] = Seq(
- AttributeReference("namespace", StringType, nullable = false)())
+object ShowNamespaces {
+ val OUTPUT = Seq(AttributeReference("namespace", StringType, nullable =
false)())
}
/**
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index f4c36dd..f55546f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2611,6 +2611,15 @@ object SQLConf {
.checkValue(_ > 0, "The timeout value must be positive")
.createWithDefault(10L)
+ val LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA =
+ buildConf("spark.sql.legacy.keepCommandOutputSchema")
+ .internal()
+ .doc("When true, Spark will keep the output schema of commands such as
SHOW DATABASES " +
+ "unchanged, for v1 catalog and/or table.")
+ .version("3.0.2")
+ .booleanConf
+ .createWithDefault(false)
+
/**
* Holds information about keys that have been deprecated.
*
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 9a8d2f0..007193f 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -225,6 +225,14 @@ class ResolveSessionCatalog(
}
AlterDatabaseSetLocationCommand(ns.head, location)
+ case s @ ShowNamespaces(ResolvedNamespace(cata, _), _, output) if
isSessionCatalog(cata) =>
+ if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
+ assert(output.length == 1)
+ s.copy(output = Seq(output.head.withName("databaseName")))
+ } else {
+ s
+ }
+
// v1 RENAME TABLE supports temp view.
case RenameTableStatement(TempViewOrV1Table(oldName), newName, isView) =>
AlterTableRenameCommand(oldName.asTableIdentifier,
newName.asTableIdentifier, isView)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 877aea1..4624e98 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -273,8 +273,8 @@ class DataSourceV2Strategy(session: SparkSession) extends
Strategy with Predicat
case DropNamespace(ResolvedNamespace(catalog, ns), ifExists, cascade) =>
DropNamespaceExec(catalog, ns, ifExists, cascade) :: Nil
- case r @ ShowNamespaces(ResolvedNamespace(catalog, ns), pattern) =>
- ShowNamespacesExec(r.output, catalog.asNamespaceCatalog, ns, pattern) ::
Nil
+ case ShowNamespaces(ResolvedNamespace(catalog, ns), pattern, output) =>
+ ShowNamespacesExec(output, catalog.asNamespaceCatalog, ns, pattern) ::
Nil
case r @ ShowTables(ResolvedNamespace(catalog, ns), pattern) =>
ShowTablesExec(r.output, catalog.asTableCatalog, ns, pattern) :: Nil
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 847bc66..21d7acf 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1325,6 +1325,12 @@ abstract class DDLSuite extends QueryTest with
SQLTestUtils {
Nil)
}
+ test("SPARK-34359: keep the legacy output schema") {
+ withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") {
+ assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq ==
Seq("databaseName"))
+ }
+ }
+
test("drop view - temporary view") {
val catalog = spark.sessionState.catalog
sql(
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]