This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ce714d8 [SPARK-31678][SQL] Print error stack trace for Spark SQL CLI
when error occurs
ce714d8 is described below
commit ce714d81894a48e2d06c530674c2190e0483e1b4
Author: Kent Yao <[email protected]>
AuthorDate: Tue May 12 13:37:13 2020 +0000
[SPARK-31678][SQL] Print error stack trace for Spark SQL CLI when error
occurs
### What changes were proposed in this pull request?
When I was finding the root cause for SPARK-31675, I noticed that it was
very difficult for me to see what was actually going on, since it output
nothing else but only
```sql
Error in query: java.lang.IllegalArgumentException: Wrong FS:
blablah/.hive-staging_blahbla, expected: hdfs://cluster1
```
It is really hard for us to find causes through such a simple error message
without a certain amount of experience.
In this PR, I propose to print all of the stack traces when
AnalysisException occurs if there are underlying root causes, also we can
escape this via `-S` option.
### Why are the changes needed?
In SPARK-11188,
>For analysis exceptions in the sql-shell, we should only print the error
message to the screen. The stacktrace will never have useful information since
this error is used to signify an error with the query.
But nowadays, some `AnalysisException`s do have useful information for us
to debug, e.g. the `AnalysisException` below may contain exceptions from hive
or Hadoop side.
https://github.com/apache/spark/blob/a28ed86a387b286745b30cd4d90b3d558205a5a7/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala#L97-L112
```scala
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
at org.apache.hadoop.fs.FileSystem.makeQualified(FileSystem.java:468)
at org.apache.hadoop.hive.common.FileUtils.isSubDir(FileUtils.java:626)
at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:2850)
at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1398)
at
org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:1593)
```
### Does this PR introduce _any_ user-facing change?
Yes, `bin/spark-sql` will print all the stack trace when an
AnalysisException which contains root causes occurs, before this fix, only the
message will be printed.
#### before
```scala
Error in query: java.lang.IllegalArgumentException: Wrong FS: hdfs:...,
expected: hdfs://hz-cluster10;
```
#### After
```scala
Error in query: java.lang.IllegalArgumentException: Wrong FS: hdfs:...,
expected: hdfs://hz-cluster10;
org.apache.spark.sql.AnalysisException: java.lang.IllegalArgumentException:
Wrong FS: ..., expected: hdfs://hz-cluster10;
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:109)
at
org.apache.spark.sql.hive.HiveExternalCatalog.loadPartition(HiveExternalCatalog.scala:890)
at
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.loadPartition(ExternalCatalogWithListener.scala:179)
at
org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:312)
at
org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at
org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
at
org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
at
org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3644)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3642)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at
org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:607)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:602)
at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:376)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:486)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processLine(SparkSQLCLIDriver.scala:480)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:282)
at
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)
at
org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:165)
at
org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:163)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Caused by: java.lang.IllegalArgumentException: Wrong FS: ..., expected:
hdfs://hz-cluster10
at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
at
org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:194)
at
org.apache.hadoop.hdfs.DistributedFileSystem.getEZForPath(DistributedFileSystem.java:2093)
at
org.apache.hadoop.hdfs.client.HdfsAdmin.getEncryptionZoneForPath(HdfsAdmin.java:289)
at
org.apache.hadoop.hive.shims.Hadoop23Shims$HdfsEncryptionShim.isPathEncrypted(Hadoop23Shims.java:1221)
at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2607)
at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:2892)
at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1398)
at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1324)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.sql.hive.client.Shim_v0_14.loadPartition(HiveShim.scala:927)
at
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$loadPartition$1(HiveClientImpl.scala:870)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294)
at
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
at
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
at
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276)
at
org.apache.spark.sql.hive.client.HiveClientImpl.loadPartition(HiveClientImpl.scala:860)
at
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$loadPartition$1(HiveExternalCatalog.scala:911)
at
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
... 52 more
```
You can use `-S` option to restore old behavior if you find the error is
too verbose.
### How was this patch tested?
Existing CliSuite -
`SPARK-11188 Analysis error reporting`
Add new test and verify manually
Closes #28499 from yaooqinn/SPARK-31678.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/hive/thriftserver/SparkSQLCLIDriver.scala | 16 ++++++++++++----
.../apache/spark/sql/hive/thriftserver/CliSuite.scala | 18 ++++++++++++++++++
2 files changed, 30 insertions(+), 4 deletions(-)
diff --git
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index bffa24c..50b5eb4 100644
---
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -380,10 +380,18 @@ private[hive] class SparkSQLCLIDriver extends CliDriver
with Logging {
ret = rc.getResponseCode
if (ret != 0) {
- // For analysis exception, only the error is printed out to the
console.
- rc.getException() match {
- case e : AnalysisException =>
- err.println(s"""Error in query: ${e.getMessage}""")
+ rc.getException match {
+ case e: AnalysisException => e.cause match {
+ case Some(_) if !sessionState.getIsSilent =>
+ err.println(
+ s"""Error in query: ${e.getMessage}
+
|${org.apache.hadoop.util.StringUtils.stringifyException(e)}
+ """.stripMargin)
+ // For analysis exceptions in silent mode or simple ones that
only related to the
+ // query itself, such as `NoSuchDatabaseException`, only the
error is printed out
+ // to the console.
+ case _ => err.println(s"""Error in query: ${e.getMessage}""")
+ }
case _ => err.println(rc.getErrorMessage())
}
driver.close()
diff --git
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 265e777..bcdcd07 100644
---
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -509,4 +509,22 @@ class CliSuite extends SparkFunSuite with
BeforeAndAfterAll with BeforeAndAfterE
"SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
)
}
+
+ test("AnalysisException with root cause will be printStacktrace") {
+ // If it is not in silent mode, will print the stacktrace
+ runCliWithin(
+ 1.minute,
+ extraArgs = Seq("--hiveconf", "hive.session.silent=false",
+ "-e", "select date_sub(date'2011-11-11', '1.2');"),
+ errorResponses = Seq("NumberFormatException"))(
+ ("", "Error in query: The second argument of 'date_sub' function needs
to be an integer."),
+ ("", "NumberFormatException: invalid input syntax for type numeric:
1.2"))
+ // If it is in silent mode, will print the error message only
+ runCliWithin(
+ 1.minute,
+ extraArgs = Seq("--conf", "spark.hive.session.silent=true",
+ "-e", "select date_sub(date'2011-11-11', '1.2');"),
+ errorResponses = Seq("AnalysisException"))(
+ ("", "Error in query: The second argument of 'date_sub' function needs
to be an integer."))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]