This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6abc4a1a58ef [SPARK-45841][SQL] Expose stack trace by
`DataFrameQueryContext`
6abc4a1a58ef is described below
commit 6abc4a1a58ef4e5d896717b10b2314dae2af78af
Author: Max Gekk <[email protected]>
AuthorDate: Wed Nov 8 15:51:50 2023 +0300
[SPARK-45841][SQL] Expose stack trace by `DataFrameQueryContext`
### What changes were proposed in this pull request?
In the PR, I propose to change the case class `DataFrameQueryContext`, and
add stack traces as a field and override `callSite`, `fragment` using the new
field `stackTrace`.
### Why are the changes needed?
By exposing the stack trace, we give users opportunity to see all stack
traces needed for debugging.
### Does this PR introduce _any_ user-facing change?
No, `DataFrameQueryContext` hasn't been released yet.
### How was this patch tested?
By running the modified test suite:
```
$ build/sbt "test:testOnly *DatasetSuite"
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43703 from MaxGekk/stack-traces-in-DataFrameQueryContext.
Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/sql/catalyst/trees/QueryContexts.scala | 33 +++++++++-------------
.../scala/org/apache/spark/sql/DatasetSuite.scala | 13 +++++----
2 files changed, 22 insertions(+), 24 deletions(-)
diff --git
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
index 8d885d07ca8b..874c834b7558 100644
---
a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
+++
b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
@@ -134,9 +134,7 @@ case class SQLQueryContext(
override def callSite: String = throw new UnsupportedOperationException
}
-case class DataFrameQueryContext(
- override val fragment: String,
- override val callSite: String) extends QueryContext {
+case class DataFrameQueryContext(stackTrace: Seq[StackTraceElement]) extends
QueryContext {
override val contextType = QueryContextType.DataFrame
override def objectType: String = throw new UnsupportedOperationException
@@ -144,6 +142,19 @@ case class DataFrameQueryContext(
override def startIndex: Int = throw new UnsupportedOperationException
override def stopIndex: Int = throw new UnsupportedOperationException
+ override val fragment: String = {
+ stackTrace.headOption.map { firstElem =>
+ val methodName = firstElem.getMethodName
+ if (methodName.length > 1 && methodName(0) == '$') {
+ methodName.substring(1)
+ } else {
+ methodName
+ }
+ }.getOrElse("")
+ }
+
+ override val callSite: String =
stackTrace.tail.headOption.map(_.toString).getOrElse("")
+
override lazy val summary: String = {
val builder = new StringBuilder
builder ++= "== DataFrame ==\n"
@@ -157,19 +168,3 @@ case class DataFrameQueryContext(
builder.result()
}
}
-
-object DataFrameQueryContext {
- def apply(elements: Array[StackTraceElement]): DataFrameQueryContext = {
- val fragment = elements.headOption.map { firstElem =>
- val methodName = firstElem.getMethodName
- if (methodName.length > 1 && methodName(0) == '$') {
- methodName.substring(1)
- } else {
- methodName
- }
- }.getOrElse("")
- val callSite = elements.tail.headOption.map(_.toString).getOrElse("")
-
- DataFrameQueryContext(fragment, callSite)
- }
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 66105d2ac429..dcbd8948120c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -37,6 +37,7 @@ import
org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders, ExpressionEncod
import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder
import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode,
GenericRowWithSchema}
import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+import org.apache.spark.sql.catalyst.trees.DataFrameQueryContext
import org.apache.spark.sql.catalyst.util.sideBySide
import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -2668,16 +2669,18 @@ class DatasetSuite extends QueryTest
withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
val df = Seq(1).toDS()
var callSitePattern: String = null
+ val exception = intercept[AnalysisException] {
+ callSitePattern = getNextLineCallSitePattern()
+ val c = col("a")
+ df.select(c)
+ }
checkError(
- exception = intercept[AnalysisException] {
- callSitePattern = getNextLineCallSitePattern()
- val c = col("a")
- df.select(c)
- },
+ exception,
errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
sqlState = "42703",
parameters = Map("objectName" -> "`a`", "proposal" -> "`value`"),
context = ExpectedContext(fragment = "col", callSitePattern =
callSitePattern))
+
assert(exception.context.head.asInstanceOf[DataFrameQueryContext].stackTrace.length
== 2)
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]