Repository: spark
Updated Branches:
refs/heads/master 877dc712e -> fa1af0aff
[SPARK-14251][SQL] Add SQL command for printing out generated code for debugging
## What changes were proposed in this pull request?
This PR implements `EXPLAIN CODEGEN` SQL command which returns generated codes
like `debugCodegen`. In `spark-shell`, we don't need to `import debug` module.
In `spark-sql`, we can use this SQL command now.
**Before**
```
scala> import org.apache.spark.sql.execution.debug._
scala> sql("select 'a' as a group by 1").debugCodegen()
Found 2 WholeStageCodegen subtrees.
== Subtree 1 / 2 ==
...
Generated code:
...
== Subtree 2 / 2 ==
...
Generated code:
...
```
**After**
```
scala> sql("explain extended codegen select 'a' as a group by
1").collect().foreach(println)
[Found 2 WholeStageCodegen subtrees.]
[== Subtree 1 / 2 ==]
...
[]
[Generated code:]
...
[]
[== Subtree 2 / 2 ==]
...
[]
[Generated code:]
...
```
## How was this patch tested?
Pass the Jenkins tests (including new testcases)
Author: Dongjoon Hyun <[email protected]>
Closes #12099 from dongjoon-hyun/SPARK-14251.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fa1af0af
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fa1af0af
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fa1af0af
Branch: refs/heads/master
Commit: fa1af0aff7bde9bbf7bfa6a3ac74699734c2fd8a
Parents: 877dc71
Author: Dongjoon Hyun <[email protected]>
Authored: Fri Apr 1 22:45:52 2016 -0700
Committer: Reynold Xin <[email protected]>
Committed: Fri Apr 1 22:45:52 2016 -0700
----------------------------------------------------------------------
.../apache/spark/sql/catalyst/parser/SqlBase.g4 | 5 ++-
.../spark/sql/execution/SparkSqlParser.scala | 3 +-
.../spark/sql/execution/command/commands.scala | 15 +++++--
.../spark/sql/execution/debug/package.scala | 43 ++++++++++----------
.../sql/execution/debug/DebuggingSuite.scala | 2 +-
.../spark/sql/hive/execution/commands.scala | 1 -
.../sql/hive/execution/HiveExplainSuite.scala | 29 +++++++++++++
7 files changed, 67 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index d1747b9..f34bb06 100644
---
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -584,7 +584,7 @@ frameBound
explainOption
- : LOGICAL | FORMATTED | EXTENDED
+ : LOGICAL | FORMATTED | EXTENDED | CODEGEN
;
transactionMode
@@ -633,7 +633,7 @@ nonReserved
| DELIMITED | FIELDS | TERMINATED | COLLECTION | ITEMS | KEYS | ESCAPED |
LINES | SEPARATED
| EXTENDED | REFRESH | CLEAR | CACHE | UNCACHE | LAZY | TEMPORARY | OPTIONS
| GROUPING | CUBE | ROLLUP
- | EXPLAIN | FORMAT | LOGICAL | FORMATTED
+ | EXPLAIN | FORMAT | LOGICAL | FORMATTED | CODEGEN
| TABLESAMPLE | USE | TO | BUCKET | PERCENTLIT | OUT | OF
| SET
| VIEW | REPLACE
@@ -724,6 +724,7 @@ DESCRIBE: 'DESCRIBE';
EXPLAIN: 'EXPLAIN';
FORMAT: 'FORMAT';
LOGICAL: 'LOGICAL';
+CODEGEN: 'CODEGEN';
CAST: 'CAST';
SHOW: 'SHOW';
TABLES: 'TABLES';
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 7efe98d..ff3ab77 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -136,7 +136,8 @@ class SparkSqlAstBuilder extends AstBuilder {
// Create the explain comment.
val statement = plan(ctx.statement)
if (isExplainableStatement(statement)) {
- ExplainCommand(statement, extended = options.exists(_.EXTENDED != null))
+ ExplainCommand(statement, extended = options.exists(_.EXTENDED != null),
+ codegen = options.exists(_.CODEGEN != null))
} else {
ExplainCommand(OneRowRelation)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index f90d871..4bc62cd 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -28,10 +28,10 @@ import
org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.debug._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
-
/**
* A logical command that is executed for its side-effects.
`RunnableCommand`s are
* wrapped in `ExecutedCommand` during execution.
@@ -237,15 +237,22 @@ case class ExplainCommand(
logicalPlan: LogicalPlan,
override val output: Seq[Attribute] =
Seq(AttributeReference("plan", StringType, nullable = true)()),
- extended: Boolean = false)
+ extended: Boolean = false,
+ codegen: Boolean = false)
extends RunnableCommand {
// Run through the optimizer to generate the physical plan.
override def run(sqlContext: SQLContext): Seq[Row] = try {
// TODO in Hive, the "extended" ExplainCommand prints the AST as well, and
detailed properties.
val queryExecution = sqlContext.executePlan(logicalPlan)
- val outputString = if (extended) queryExecution.toString else
queryExecution.simpleString
-
+ val outputString =
+ if (codegen) {
+ codegenString(queryExecution.executedPlan)
+ } else if (extended) {
+ queryExecution.toString
+ } else {
+ queryExecution.simpleString
+ }
outputString.split("\n").map(Row(_))
} catch { case cause: TreeNodeException[_] =>
("Error occurred during query planning: \n" +
cause.getMessage).split("\n").map(Row(_))
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
index 3a174ed..7b0c8eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/debug/package.scala
@@ -48,6 +48,25 @@ package object debug {
// scalastyle:on println
}
+ def codegenString(plan: SparkPlan): String = {
+ val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
+ plan transform {
+ case s: WholeStageCodegen =>
+ codegenSubtrees += s
+ s
+ case s => s
+ }
+ var output = s"Found ${codegenSubtrees.size} WholeStageCodegen subtrees.\n"
+ for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
+ output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
+ output += s
+ output += "\nGenerated code:\n"
+ val (_, source) = s.doCodeGen()
+ output += s"${CodeFormatter.format(source)}\n"
+ }
+ output
+ }
+
/**
* Augments [[SQLContext]] with debug methods.
*/
@@ -81,28 +100,7 @@ package object debug {
* WholeStageCodegen subtree).
*/
def debugCodegen(): Unit = {
- debugPrint(debugCodegenString())
- }
-
- /** Visible for testing. */
- def debugCodegenString(): String = {
- val plan = query.queryExecution.executedPlan
- val codegenSubtrees = new collection.mutable.HashSet[WholeStageCodegen]()
- plan transform {
- case s: WholeStageCodegen =>
- codegenSubtrees += s
- s
- case s => s
- }
- var output = s"Found ${codegenSubtrees.size} WholeStageCodegen
subtrees.\n"
- for ((s, i) <- codegenSubtrees.toSeq.zipWithIndex) {
- output += s"== Subtree ${i + 1} / ${codegenSubtrees.size} ==\n"
- output += s
- output += "\nGenerated code:\n"
- val (_, source) = s.doCodeGen()
- output += s"${CodeFormatter.format(source)}\n"
- }
- output
+ debugPrint(codegenString(query.queryExecution.executedPlan))
}
}
@@ -123,6 +121,7 @@ package object debug {
/**
* A collection of metrics for each column of output.
+ *
* @param elementTypes the actual runtime types for the output. Useful
when there are bugs
* causing the wrong data to be projected.
*/
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
index 979265e..c0fce4b 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala
@@ -27,7 +27,7 @@ class DebuggingSuite extends SparkFunSuite with
SharedSQLContext {
}
test("debugCodegen") {
- val res = sqlContext.range(10).groupBy("id").count().debugCodegenString()
+ val res =
codegenString(sqlContext.range(10).groupBy("id").count().queryExecution.executedPlan)
assert(res.contains("Subtree 1 / 2"))
assert(res.contains("Subtree 2 / 2"))
assert(res.contains("Object[]"))
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
----------------------------------------------------------------------
diff --git
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
index cd26a68..64d1341 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/commands.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.execution.datasources.{BucketSpec, DataSource,
LogicalRelation}
import org.apache.spark.sql.hive.HiveContext
http://git-wip-us.apache.org/repos/asf/spark/blob/fa1af0af/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index b7ef5d1..c45d49d 100644
---
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -101,4 +101,33 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils
with TestHiveSingleto
"Physical Plan should not contain Subquery since it's eliminated by
optimizer")
}
}
+
+ test("EXPLAIN CODEGEN command") {
+ checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), true,
+ "WholeStageCodegen",
+ "Generated code:",
+ "/* 001 */ public Object generate(Object[] references) {",
+ "/* 002 */ return new GeneratedIterator(references);",
+ "/* 003 */ }"
+ )
+
+ checkExistence(sql("EXPLAIN CODEGEN SELECT 1"), false,
+ "== Physical Plan =="
+ )
+
+ checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), true,
+ "WholeStageCodegen",
+ "Generated code:",
+ "/* 001 */ public Object generate(Object[] references) {",
+ "/* 002 */ return new GeneratedIterator(references);",
+ "/* 003 */ }"
+ )
+
+ checkExistence(sql("EXPLAIN EXTENDED CODEGEN SELECT 1"), false,
+ "== Parsed Logical Plan ==",
+ "== Analyzed Logical Plan ==",
+ "== Optimized Logical Plan ==",
+ "== Physical Plan =="
+ )
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]