[spark] branch master updated: [SPARK-26661][SQL] Show actual class name of the writing command in CTAS explain

dongjoon Tue, 22 Jan 2019 13:56:43 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 02d8ae3  [SPARK-26661][SQL] Show actual class name of the writing 
command in CTAS explain
02d8ae3 is described below

commit 02d8ae3d598f201c8f614c8af5f0d94470e98e98
Author: Kris Mok <kris....@databricks.com>
AuthorDate: Tue Jan 22 13:55:41 2019 -0800

    [SPARK-26661][SQL] Show actual class name of the writing command in CTAS 
explain
    
    ## What changes were proposed in this pull request?
    
    The explain output of the Hive CTAS command, regardless of whether it's 
actually writing via Hive's SerDe or converted into using Spark's data source, 
would always show that it's using `InsertIntoHiveTable` because it's hardcoded.
    
    e.g.
    ```
    Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, 
TableName: foo, InsertIntoHiveTable]
    ```
    This CTAS is converted into using Spark's data source, but it still says 
`InsertIntoHiveTable` in the explain output.
    
    It's better to show the actual class name of the writing command used. For 
the example above, it'd be:
    ```
    Execute OptimizedCreateHiveTableAsSelectCommand [Database:default, 
TableName: foo, InsertIntoHadoopFsRelationCommand]
    ```
    
    ## How was this patch tested?
    
    Added test case in `HiveExplainSuite`
    
    Closes #23582 from rednaxelafx/fix-explain-1.
    
    Authored-by: Kris Mok <kris....@databricks.com>
    Signed-off-by: Dongjoon Hyun <dongj...@apache.org>
---
 .../execution/CreateHiveTableAsSelectCommand.scala | 15 +++++++++++--
 .../sql/hive/execution/HiveExplainSuite.scala      | 25 ++++++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 7249eac..9f79997 100644
--- 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, 
InsertIntoHadoopFsRelationCommand, LogicalRelation}
 import org.apache.spark.sql.hive.HiveSessionCatalog
+import org.apache.spark.util.Utils
 
 trait CreateHiveTableAsSelectBase extends DataWritingCommand {
   val tableDesc: CatalogTable
@@ -83,10 +84,14 @@ trait CreateHiveTableAsSelectBase extends 
DataWritingCommand {
     tableDesc: CatalogTable,
     tableExists: Boolean): DataWritingCommand
 
+  // A subclass should override this with the Class name of the concrete type 
expected to be
+  // returned from `getWritingCommand`.
+  def writingCommandClassName: String
+
   override def argString(maxFields: Int): String = {
-    s"[Database:${tableDesc.database}, " +
+    s"[Database: ${tableDesc.database}, " +
     s"TableName: ${tableDesc.identifier.table}, " +
-    s"InsertIntoHiveTable]"
+    s"${writingCommandClassName}]"
   }
 }
 
@@ -118,6 +123,9 @@ case class CreateHiveTableAsSelectCommand(
       ifPartitionNotExists = false,
       outputColumnNames = outputColumnNames)
   }
+
+  override def writingCommandClassName: String =
+    Utils.getSimpleName(classOf[InsertIntoHiveTable])
 }
 
 /**
@@ -162,4 +170,7 @@ case class OptimizedCreateHiveTableAsSelectCommand(
       Some(hadoopRelation.location),
       query.output.map(_.name))
   }
+
+  override def writingCommandClassName: String =
+    Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand])
 }
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index c349a32..d413dfb 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -20,9 +20,13 @@ package org.apache.spark.sql.hive.execution
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.ParseException
+import 
org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
+import org.apache.spark.sql.hive.HiveUtils
+import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.util.Utils
 
 /**
  * A set of tests that validates support for Hive Explain command.
@@ -182,4 +186,25 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils 
with TestHiveSingleto
       assert(output.toString.contains(s"Scan hive default.$tableName"))
     }
   }
+
+  test("SPARK-26661: Show actual class name of the writing command in CTAS 
explain") {
+    Seq(true, false).foreach { convertCTAS =>
+      withSQLConf(
+          HiveUtils.CONVERT_METASTORE_CTAS.key -> convertCTAS.toString,
+          HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertCTAS.toString) {
+
+        val df = sql(s"EXPLAIN CREATE TABLE tab1 STORED AS PARQUET AS SELECT * 
FROM range(2)")
+        val keywords = if (convertCTAS) {
+          Seq(
+            s"Execute 
${Utils.getSimpleName(classOf[OptimizedCreateHiveTableAsSelectCommand])}",
+            Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand]))
+        } else {
+          Seq(
+            s"Execute 
${Utils.getSimpleName(classOf[CreateHiveTableAsSelectCommand])}",
+            Utils.getSimpleName(classOf[InsertIntoHiveTable]))
+        }
+        checkKeywordsExist(df, keywords: _*)
+      }
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-26661][SQL] Show actual class name of the writing command in CTAS explain

Reply via email to