spark git commit: [SPARK-14251][SQL] Add SQL command for printing out generated code for debugging

rxin Fri, 01 Apr 2016 22:46:59 -0700

Repository: spark
Updated Branches:
  refs/heads/master 877dc712e -> fa1af0aff



[SPARK-14251][SQL] Add SQL command for printing out generated code for debugging

## What changes were proposed in this pull request?

This PR implements `EXPLAIN CODEGEN` SQL command which returns generated codes 
like `debugCodegen`. In `spark-shell`, we don't need to `import debug` module. 
In `spark-sql`, we can use this SQL command now.

**Before**
```
scala> import org.apache.spark.sql.execution.debug._
scala> sql("select 'a' as a group by 1").debugCodegen()
Found 2 WholeStageCodegen subtrees.
== Subtree 1 / 2 ==
...

Generated code:
...

== Subtree 2 / 2 ==
...

Generated code:
...
```

**After**
```
scala> sql("explain extended codegen select 'a' as a group by 
1").collect().foreach(println)
[Found 2 WholeStageCodegen subtrees.]
[== Subtree 1 / 2 ==]
...
[]
[Generated code:]
...
[]
[== Subtree 2 / 2 ==]
...
[]
[Generated code:]
...
```

## How was this patch tested?

Pass the Jenkins tests (including new testcases)

Author: Dongjoon Hyun <[email protected]>

Closes #12099 from dongjoon-hyun/SPARK-14251.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa1af0af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa1af0af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa1af0af

Branch: refs/heads/master
Commit: fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a
Parents: 877dc71
Author: Dongjoon Hyun <[email protected]>
Authored: Fri Apr 1 22:45:52 2016 -0700
Committer: Reynold Xin <[email protected]>
Committed: Fri Apr 1 22:45:52 2016 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  5 ++-
 .../spark/sql/execution/SparkSqlParser.scala    |  3 +-
 .../spark/sql/execution/command/commands.scala  | 15 +++++--
 .../spark/sql/execution/debug/package.scala     | 43 ++++++++++----------
 .../sql/execution/debug/DebuggingSuite.scala    |  2 +-
 .../spark/sql/hive/execution/commands.scala     |  1 -
 .../sql/hive/execution/HiveExplainSuite.scala   | 29 +++++++++++++
 7 files changed, 67 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d1747b9..f34bb06 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -584,7 +584,7 @@ frameBound
 
 
 explainOption
-    : LOGICAL | FORMATTED | EXTENDED
+    : LOGICAL | FORMATTED | EXTENDED | CODEGEN
     ;
 
 transactionMode
@@ -633,7 +633,7 @@ nonReserved
     | DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED | 
LINES | SEPARATED
     | EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS
     | GROUPING | CUBE | ROLLUP
-    | EXPLAIN | FORMAT | LOGICAL | FORMATTED
+    | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN
     | TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
     | SET
     | VIEW | REPLACE
@@ -724,6 +724,7 @@ DESCRIBE: 'DESCRIBE';
 EXPLAIN: 'EXPLAIN';
 FORMAT: 'FORMAT';
 LOGICAL: 'LOGICAL';
+CODEGEN: 'CODEGEN';
 CAST: 'CAST';
 SHOW: 'SHOW';
 TABLES: 'TABLES';

http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7efe98d..ff3ab77 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -136,7 +136,8 @@ class SparkSqlAstBuilder extends AstBuilder {
     // Create the explain comment.
     val statement = plan(ctx.statement)
     if (isExplainableStatement(statement)) {
-      ExplainCommand(statement, extended = options.exists(_.EXTENDED != null))
+      ExplainCommand(statement, extended = options.exists(_.EXTENDED != null),
+        codegen = options.exists(_.CODEGEN != null))
     } else {
       ExplainCommand(OneRowRelation)
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index f90d871..4bc62cd 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -28,10 +28,10 @@ import 
org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.debug._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-
 /**
  * A logical command that is executed for its side-effects.  
`RunnableCommand`s are
  * wrapped in `ExecutedCommand` during execution.
@@ -237,15 +237,22 @@ case class ExplainCommand(
     logicalPlan: LogicalPlan,
     override val output: Seq[Attribute] =
       Seq(AttributeReference("plan", StringType, nullable = true)()),
-    extended: Boolean = false)
+    extended: Boolean = false,
+    codegen: Boolean = false)
   extends RunnableCommand {
 
   // Run through the optimizer to generate the physical plan.
   override def run(sqlContext: SQLContext): Seq[Row] = try {
     // TODO in Hive, the "extended" ExplainCommand prints the AST as well, and 
detailed properties.
     val queryExecution = sqlContext.executePlan(logicalPlan)
-    val outputString = if (extended) queryExecution.toString else 
queryExecution.simpleString
-
+    val outputString =
+      if (codegen) {
+        codegenString(queryExecution.executedPlan)
+      } else if (extended) {
+        queryExecution.toString
+      } else {
+        queryExecution.simpleString
+      }
     outputString.split("\n").map(Row(_))
   } catch { case cause: TreeNodeException[_] =>
     ("Error occurred during query planning: \n" + 
cause.getMessage).split("\n").map(Row(_))

http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 3a174ed..7b0c8eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -48,6 +48,25 @@ package object debug {
     // scalastyle:on println
   }
 
+  def codegenString(plan: SparkPlan): String = {
+    val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
+    plan transform {
+      case s: WholeStageCodegen =>
+        codegenSubtrees += s
+        s
+      case s => s
+    }
+    var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
+    for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
+      output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
+      output += s
+      output += "\nGenerated code:\n"
+      val (_, source) = s.doCodeGen()
+      output += s"${CodeFormatter.format(source)}\n"
+    }
+    output
+  }
+
   /**
    * Augments [[SQLContext]] with debug methods.
    */
@@ -81,28 +100,7 @@ package object debug {
      * WholeStageCodegen subtree).
      */
     def debugCodegen(): Unit = {
-      debugPrint(debugCodegenString())
-    }
-
-    /** Visible for testing. */
-    def debugCodegenString(): String = {
-      val plan = query.queryExecution.executedPlan
-      val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
-      plan transform {
-        case s: WholeStageCodegen =>
-          codegenSubtrees += s
-          s
-        case s => s
-      }
-      var output = s"Found ${codegenSubtrees.size} WholeStageCodegen 
subtrees.\n"
-      for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
-        output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
-        output += s
-        output += "\nGenerated code:\n"
-        val (_, source) = s.doCodeGen()
-        output += s"${CodeFormatter.format(source)}\n"
-      }
-      output
+      debugPrint(codegenString(query.queryExecution.executedPlan))
     }
   }
 
@@ -123,6 +121,7 @@ package object debug {
 
     /**
      * A collection of metrics for each column of output.
+     *
      * @param elementTypes the actual runtime types for the output.  Useful 
when there are bugs
      *                     causing the wrong data to be projected.
      */

http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 979265e..c0fce4b 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -27,7 +27,7 @@ class DebuggingSuite extends SparkFunSuite with 
SharedSQLContext {
   }
 
   test("debugCodegen") {
-    val res = sqlContext.range(10).groupBy("id").count().debugCodegenString()
+    val res = 
codegenString(sqlContext.range(10).groupBy("id").count().queryExecution.executedPlan)
     assert(res.contains("Subtree 1 / 2"))
     assert(res.contains("Subtree 2 / 2"))
     assert(res.contains("Object[]"))

http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index cd26a68..64d1341 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.execution.command.RunnableCommand
 import org.apache.spark.sql.execution.datasources.{BucketSpec, DataSource, 
LogicalRelation}
 import org.apache.spark.sql.hive.HiveContext

http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index b7ef5d1..c45d49d 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -101,4 +101,33 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils 
with TestHiveSingleto
         "Physical Plan should not contain Subquery since it's eliminated by 
optimizer")
     }
   }
+
+  test("EXPLAIN CODEGEN command") {
+    checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), true,
+      "WholeStageCodegen",
+      "Generated code:",
+      "/* 001 */ public Object generate(Object[] references) {",
+      "/* 002 */   return new GeneratedIterator(references);",
+      "/* 003 */ }"
+    )
+
+    checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), false,
+      "== Physical Plan =="
+    )
+
+    checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), true,
+      "WholeStageCodegen",
+      "Generated code:",
+      "/* 001 */ public Object generate(Object[] references) {",
+      "/* 002 */   return new GeneratedIterator(references);",
+      "/* 003 */ }"
+    )
+
+    checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), false,
+      "== Parsed Logical Plan ==",
+      "== Analyzed Logical Plan ==",
+      "== Optimized Logical Plan ==",
+      "== Physical Plan =="
+    )
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-14251][SQL] Add SQL command for printing out generated code for debugging

Reply via email to