Repository: spark
Updated Branches:
refs/heads/master 0e0904a2f -> 5f731d685
[SPARK-15792][SQL] Allows operator to change the verbosity in explain output
## What changes were proposed in this pull request?
This PR allows customization of verbosity in explain output. After change,
`dataframe.explain()` and `dataframe.explain(true)` has different verbosity
output for physical plan.
Currently, this PR only enables verbosity string for operator
`HashAggregateExec` and `SortAggregateExec`. We will gradually enable verbosity
string for more operators in future.
**Less verbose mode:** dataframe.explain(extended = false)
`output=[count(a)#85L]` is **NOT** displayed for HashAggregate.
```
scala> Seq((1,2,3)).toDF("a", "b", "c").createTempView("df2")
scala> spark.sql("select count(a) from df2").explain()
== Physical Plan ==
*HashAggregate(key=[], functions=[count(1)])
+- Exchange SinglePartition
+- *HashAggregate(key=[], functions=[partial_count(1)])
+- LocalTableScan
```
**Verbose mode:** dataframe.explain(extended = true)
`output=[count(a)#85L]` is displayed for HashAggregate.
```
scala> spark.sql("select count(a) from df2").explain(true) //
"output=[count(a)#85L]" is added
...
== Physical Plan ==
*HashAggregate(key=[], functions=[count(1)], output=[count(a)#85L])
+- Exchange SinglePartition
+- *HashAggregate(key=[], functions=[partial_count(1)], output=[count#87L])
+- LocalTableScan
```
## How was this patch tested?
Manual test.
Author: Sean Zhong <[email protected]>
Closes #13535 from clockfly/verbose_breakdown_2.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f731d68
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f731d68
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f731d68
Branch: refs/heads/master
Commit: 5f731d6859c4516941e5f90c99c966ef76268864
Parents: 0e0904a
Author: Sean Zhong <[email protected]>
Authored: Mon Jun 6 22:59:25 2016 -0700
Committer: Cheng Lian <[email protected]>
Committed: Mon Jun 6 22:59:25 2016 -0700
----------------------------------------------------------------------
.../sql/catalyst/expressions/Expression.scala | 4 ++++
.../spark/sql/catalyst/plans/QueryPlan.scala | 2 ++
.../spark/sql/catalyst/trees/TreeNode.scala | 23 +++++++++++++++-----
.../spark/sql/execution/QueryExecution.scala | 14 +++++++-----
.../sql/execution/WholeStageCodegenExec.scala | 6 +++--
.../execution/aggregate/HashAggregateExec.scala | 12 ++++++++--
.../execution/aggregate/SortAggregateExec.scala | 12 ++++++++--
7 files changed, 55 insertions(+), 18 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 2ec4621..efe592d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -190,6 +190,10 @@ abstract class Expression extends TreeNode[Expression] {
case single => single :: Nil
}
+ // Marks this as final, Expression.verboseString should never be called, and
thus shouldn't be
+ // overridden by concrete classes.
+ final override def verboseString: String = simpleString
+
override def simpleString: String = toString
override def toString: String = prettyName + flatArguments.mkString("(", ",
", ")")
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 19a66cf..cf34f4b 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -257,6 +257,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
extends TreeNode[PlanT
override def simpleString: String = statePrefix + super.simpleString
+ override def verboseString: String = simpleString
+
/**
* All the subqueries of current plan.
*/
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
----------------------------------------------------------------------
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 22d82c6..c67366d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -462,10 +462,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
extends Product {
/** ONE line description of this node. */
def simpleString: String = s"$nodeName $argString".trim
+ /** ONE line description of this node with more information */
+ def verboseString: String
+
override def toString: String = treeString
/** Returns a string representation of the nodes in this tree */
- def treeString: String = generateTreeString(0, Nil, new
StringBuilder).toString
+ def treeString: String = treeString(verbose = true)
+
+ def treeString(verbose: Boolean): String = {
+ generateTreeString(0, Nil, new StringBuilder, verbose).toString
+ }
/**
* Returns a string representation of the nodes in this tree, where each
operator is numbered.
@@ -508,6 +515,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
extends Product {
depth: Int,
lastChildren: Seq[Boolean],
builder: StringBuilder,
+ verbose: Boolean,
prefix: String = ""): StringBuilder = {
if (depth > 0) {
lastChildren.init.foreach { isLast =>
@@ -520,18 +528,21 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
extends Product {
}
builder.append(prefix)
- builder.append(simpleString)
+ val headline = if (verbose) verboseString else simpleString
+ builder.append(headline)
builder.append("\n")
if (innerChildren.nonEmpty) {
innerChildren.init.foreach(_.generateTreeString(
- depth + 2, lastChildren :+ false :+ false, builder))
- innerChildren.last.generateTreeString(depth + 2, lastChildren :+ false
:+ true, builder)
+ depth + 2, lastChildren :+ false :+ false, builder, verbose))
+ innerChildren.last.generateTreeString(
+ depth + 2, lastChildren :+ false :+ true, builder, verbose)
}
if (children.nonEmpty) {
- children.init.foreach(_.generateTreeString(depth + 1, lastChildren :+
false, builder, prefix))
- children.last.generateTreeString(depth + 1, lastChildren :+ true,
builder, prefix)
+ children.init.foreach(
+ _.generateTreeString(depth + 1, lastChildren :+ false, builder,
verbose, prefix))
+ children.last.generateTreeString(depth + 1, lastChildren :+ true,
builder, verbose, prefix)
}
builder
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 330459c..560214a 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -201,24 +201,26 @@ class QueryExecution(val sparkSession: SparkSession, val
logical: LogicalPlan) {
def simpleString: String = {
s"""== Physical Plan ==
- |${stringOrError(executedPlan)}
+ |${stringOrError(executedPlan.treeString(verbose = false))}
""".stripMargin.trim
}
override def toString: String = {
def output =
analyzed.output.map(o => s"${o.name}:
${o.dataType.simpleString}").mkString(", ")
- val analyzedPlan =
- Seq(stringOrError(output),
stringOrError(analyzed)).filter(_.nonEmpty).mkString("\n")
+ val analyzedPlan = Seq(
+ stringOrError(output),
+ stringOrError(analyzed.treeString(verbose = true))
+ ).filter(_.nonEmpty).mkString("\n")
s"""== Parsed Logical Plan ==
- |${stringOrError(logical)}
+ |${stringOrError(logical.treeString(verbose = true))}
|== Analyzed Logical Plan ==
|$analyzedPlan
|== Optimized Logical Plan ==
- |${stringOrError(optimizedPlan)}
+ |${stringOrError(optimizedPlan.treeString(verbose = true))}
|== Physical Plan ==
- |${stringOrError(executedPlan)}
+ |${stringOrError(executedPlan.treeString(verbose = true))}
""".stripMargin.trim
}
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index d3e8d4e..e0d8e35 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -250,8 +250,9 @@ case class InputAdapter(child: SparkPlan) extends
UnaryExecNode with CodegenSupp
depth: Int,
lastChildren: Seq[Boolean],
builder: StringBuilder,
+ verbose: Boolean,
prefix: String = ""): StringBuilder = {
- child.generateTreeString(depth, lastChildren, builder, "")
+ child.generateTreeString(depth, lastChildren, builder, verbose, "")
}
}
@@ -407,8 +408,9 @@ case class WholeStageCodegenExec(child: SparkPlan) extends
UnaryExecNode with Co
depth: Int,
lastChildren: Seq[Boolean],
builder: StringBuilder,
+ verbose: Boolean,
prefix: String = ""): StringBuilder = {
- child.generateTreeString(depth, lastChildren, builder, "*")
+ child.generateTreeString(depth, lastChildren, builder, verbose, "*")
}
}
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index f270ca0..b617e26 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -764,7 +764,11 @@ case class HashAggregateExec(
"""
}
- override def simpleString: String = {
+ override def verboseString: String = toString(verbose = true)
+
+ override def simpleString: String = toString(verbose = false)
+
+ private def toString(verbose: Boolean): String = {
val allAggregateExpressions = aggregateExpressions
testFallbackStartsAt match {
@@ -772,7 +776,11 @@ case class HashAggregateExec(
val keyString = groupingExpressions.mkString("[", ",", "]")
val functionString = allAggregateExpressions.mkString("[", ",", "]")
val outputString = output.mkString("[", ",", "]")
- s"HashAggregate(key=$keyString, functions=$functionString,
output=$outputString)"
+ if (verbose) {
+ s"HashAggregate(key=$keyString, functions=$functionString,
output=$outputString)"
+ } else {
+ s"HashAggregate(key=$keyString, functions=$functionString)"
+ }
case Some(fallbackStartsAt) =>
s"HashAggregateWithControlledFallback $groupingExpressions " +
s"$allAggregateExpressions $resultExpressions
fallbackStartsAt=$fallbackStartsAt"
http://git-wip-us.apache.org/repos/asf/spark/blob/5f731d68/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index 9e48ff8..41ba9f5 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -103,12 +103,20 @@ case class SortAggregateExec(
}
}
- override def simpleString: String = {
+ override def simpleString: String = toString(verbose = false)
+
+ override def verboseString: String = toString(verbose = true)
+
+ private def toString(verbose: Boolean): String = {
val allAggregateExpressions = aggregateExpressions
val keyString = groupingExpressions.mkString("[", ",", "]")
val functionString = allAggregateExpressions.mkString("[", ",", "]")
val outputString = output.mkString("[", ",", "]")
- s"SortAggregate(key=$keyString, functions=$functionString,
output=$outputString)"
+ if (verbose) {
+ s"SortAggregate(key=$keyString, functions=$functionString,
output=$outputString)"
+ } else {
+ s"SortAggregate(key=$keyString, functions=$functionString)"
+ }
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]