This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 02d8ae3 [SPARK-26661][SQL] Show actual class name of the writing command in CTAS explain 02d8ae3 is described below commit 02d8ae3d598f201c8f614c8af5f0d94470e98e98 Author: Kris Mok <kris....@databricks.com> AuthorDate: Tue Jan 22 13:55:41 2019 -0800 [SPARK-26661][SQL] Show actual class name of the writing command in CTAS explain ## What changes were proposed in this pull request? The explain output of the Hive CTAS command, regardless of whether it's actually writing via Hive's SerDe or converted into using Spark's data source, would always show that it's using `InsertIntoHiveTable` because it's hardcoded. e.g. ``` Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHiveTable] ``` This CTAS is converted into using Spark's data source, but it still says `InsertIntoHiveTable` in the explain output. It's better to show the actual class name of the writing command used. For the example above, it'd be: ``` Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, TableName: foo, InsertIntoHadoopFsRelationCommand] ``` ## How was this patch tested? Added test case in `HiveExplainSuite` Closes #23582 from rednaxelafx/fix-explain-1. Authored-by: Kris Mok <kris....@databricks.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../execution/CreateHiveTableAsSelectCommand.scala | 15 +++++++++++-- .../sql/hive/execution/HiveExplainSuite.scala | 25 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala index 7249eac..9f79997 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils} import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation} import org.apache.spark.sql.hive.HiveSessionCatalog +import org.apache.spark.util.Utils trait CreateHiveTableAsSelectBase extends DataWritingCommand { val tableDesc: CatalogTable @@ -83,10 +84,14 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand { tableDesc: CatalogTable, tableExists: Boolean): DataWritingCommand + // A subclass should override this with the Class name of the concrete type expected to be + // returned from `getWritingCommand`. + def writingCommandClassName: String + override def argString(maxFields: Int): String = { - s"[Database:${tableDesc.database}, " + + s"[Database: ${tableDesc.database}, " + s"TableName: ${tableDesc.identifier.table}, " + - s"InsertIntoHiveTable]" + s"${writingCommandClassName}]" } } @@ -118,6 +123,9 @@ case class CreateHiveTableAsSelectCommand( ifPartitionNotExists = false, outputColumnNames = outputColumnNames) } + + override def writingCommandClassName: String = + Utils.getSimpleName(classOf[InsertIntoHiveTable]) } /** @@ -162,4 +170,7 @@ case class OptimizedCreateHiveTableAsSelectCommand( Some(hadoopRelation.location), query.output.map(_.name)) } + + override def writingCommandClassName: String = + Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand]) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala index c349a32..d413dfb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala @@ -20,9 +20,13 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand +import org.apache.spark.sql.hive.HiveUtils +import org.apache.spark.sql.hive.execution._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.util.Utils /** * A set of tests that validates support for Hive Explain command. @@ -182,4 +186,25 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto assert(output.toString.contains(s"Scan hive default.$tableName")) } } + + test("SPARK-26661: Show actual class name of the writing command in CTAS explain") { + Seq(true, false).foreach { convertCTAS => + withSQLConf( + HiveUtils.CONVERT_METASTORE_CTAS.key -> convertCTAS.toString, + HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertCTAS.toString) { + + val df = sql(s"EXPLAIN CREATE TABLE tab1 STORED AS PARQUET AS SELECT * FROM range(2)") + val keywords = if (convertCTAS) { + Seq( + s"Execute ${Utils.getSimpleName(classOf[OptimizedCreateHiveTableAsSelectCommand])}", + Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand])) + } else { + Seq( + s"Execute ${Utils.getSimpleName(classOf[CreateHiveTableAsSelectCommand])}", + Utils.getSimpleName(classOf[InsertIntoHiveTable])) + } + checkKeywordsExist(df, keywords: _*) + } + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org