This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 2549e38  [SPARK-31678][SQL] Print error stack trace for Spark SQL CLI 
when error occurs
2549e38 is described below

commit 2549e38690fd461c7d01518f1c5df2452efa66b5
Author: Kent Yao <[email protected]>
AuthorDate: Tue May 12 13:37:13 2020 +0000

    [SPARK-31678][SQL] Print error stack trace for Spark SQL CLI when error 
occurs
    
    ### What changes were proposed in this pull request?
    
    When I was finding the root cause for SPARK-31675, I noticed that it was 
very difficult for me to see what was actually going on, since it output 
nothing else but only
    ```sql
    Error in query: java.lang.IllegalArgumentException: Wrong FS: 
blablah/.hive-staging_blahbla, expected: hdfs://cluster1
    ```
    
    It is really hard for us to find causes through such a simple error message 
without a certain amount of experience.
    
    In this PR, I propose to print all of the stack traces when 
AnalysisException occurs if there are underlying root causes, also we can 
escape this via `-S` option.
    
    ### Why are the changes needed?
    
    In SPARK-11188,
    
    >For analysis exceptions in the sql-shell, we should only print the error 
message to the screen. The stacktrace will never have useful information since 
this error is used to signify an error with the query.
    
    But nowadays, some `AnalysisException`s do have useful information for us 
to debug, e.g. the `AnalysisException` below may contain exceptions from hive 
or Hadoop side.
    
    
https://github.com/apache/spark/blob/a28ed86a387b286745b30cd4d90b3d558205a5a7/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala#L97-L112
    
    ```scala
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
        at org.apache.hadoop.fs.FileSystem.makeQualified(FileSystem.java:468)
        at org.apache.hadoop.hive.common.FileUtils.isSubDir(FileUtils.java:626)
        at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:2850)
        at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1398)
        at 
org.apache.hadoop.hive.ql.metadata.Hive.loadDynamicPartitions(Hive.java:1593)
    ```
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, `bin/spark-sql` will print all the stack trace when an 
AnalysisException which contains root causes occurs, before this fix, only the 
message will be printed.
    
    #### before
    
    ```scala
    Error in query: java.lang.IllegalArgumentException: Wrong FS: hdfs:..., 
expected: hdfs://hz-cluster10;
    ```
    
    #### After
    ```scala
    Error in query: java.lang.IllegalArgumentException: Wrong FS: hdfs:..., 
expected: hdfs://hz-cluster10;
    org.apache.spark.sql.AnalysisException: java.lang.IllegalArgumentException: 
Wrong FS: ..., expected: hdfs://hz-cluster10;
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:109)
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.loadPartition(HiveExternalCatalog.scala:890)
        at 
org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.loadPartition(ExternalCatalogWithListener.scala:179)
        at 
org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:312)
        at 
org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
        at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
        at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
        at 
org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
        at 
org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
        at 
org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3644)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
        at 
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
        at 
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
        at 
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
        at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3642)
        at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
        at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
        at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
        at 
org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:607)
        at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:764)
        at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:602)
        at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:650)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLDriver.run(SparkSQLDriver.scala:63)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processCmd(SparkSQLCLIDriver.scala:376)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.$anonfun$processLine$1(SparkSQLCLIDriver.scala:486)
        at scala.collection.Iterator.foreach(Iterator.scala:941)
        at scala.collection.Iterator.foreach$(Iterator.scala:941)
        at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
        at scala.collection.IterableLike.foreach(IterableLike.scala:74)
        at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
        at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.processLine(SparkSQLCLIDriver.scala:480)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver$.main(SparkSQLCLIDriver.scala:282)
        at 
org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
        at 
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:934)
        at 
org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:165)
        at 
org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:163)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746)
        at 
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:163)
        at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
        at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
        at 
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1013)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1022)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
    Caused by: java.lang.IllegalArgumentException: Wrong FS: ..., expected: 
hdfs://hz-cluster10
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:649)
        at 
org.apache.hadoop.hdfs.DistributedFileSystem.getPathName(DistributedFileSystem.java:194)
        at 
org.apache.hadoop.hdfs.DistributedFileSystem.getEZForPath(DistributedFileSystem.java:2093)
        at 
org.apache.hadoop.hdfs.client.HdfsAdmin.getEncryptionZoneForPath(HdfsAdmin.java:289)
        at 
org.apache.hadoop.hive.shims.Hadoop23Shims$HdfsEncryptionShim.isPathEncrypted(Hadoop23Shims.java:1221)
        at org.apache.hadoop.hive.ql.metadata.Hive.moveFile(Hive.java:2607)
        at org.apache.hadoop.hive.ql.metadata.Hive.replaceFiles(Hive.java:2892)
        at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1398)
        at org.apache.hadoop.hive.ql.metadata.Hive.loadPartition(Hive.java:1324)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:498)
        at 
org.apache.spark.sql.hive.client.Shim_v0_14.loadPartition(HiveShim.scala:927)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$loadPartition$1(HiveClientImpl.scala:870)
        at 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:294)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:276)
        at 
org.apache.spark.sql.hive.client.HiveClientImpl.loadPartition(HiveClientImpl.scala:860)
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.$anonfun$loadPartition$1(HiveExternalCatalog.scala:911)
        at 
scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
        at 
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99)
        ... 52 more
    ```
    
    You can use `-S` option to restore old behavior if you find the error is 
too verbose.
    
    ### How was this patch tested?
    
    Existing CliSuite -
    `SPARK-11188 Analysis error reporting`
    
    Add new test and verify manually
    
    Closes #28499 from yaooqinn/SPARK-31678.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
    (cherry picked from commit ce714d81894a48e2d06c530674c2190e0483e1b4)
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/hive/thriftserver/SparkSQLCLIDriver.scala      | 16 ++++++++++++----
 .../apache/spark/sql/hive/thriftserver/CliSuite.scala  | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index bffa24c..50b5eb4 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -380,10 +380,18 @@ private[hive] class SparkSQLCLIDriver extends CliDriver 
with Logging {
 
           ret = rc.getResponseCode
           if (ret != 0) {
-            // For analysis exception, only the error is printed out to the 
console.
-            rc.getException() match {
-              case e : AnalysisException =>
-                err.println(s"""Error in query: ${e.getMessage}""")
+            rc.getException match {
+              case e: AnalysisException => e.cause match {
+                case Some(_) if !sessionState.getIsSilent =>
+                  err.println(
+                    s"""Error in query: ${e.getMessage}
+                       
|${org.apache.hadoop.util.StringUtils.stringifyException(e)}
+                     """.stripMargin)
+                // For analysis exceptions in silent mode or simple ones that 
only related to the
+                // query itself, such as `NoSuchDatabaseException`, only the 
error is printed out
+                // to the console.
+                case _ => err.println(s"""Error in query: ${e.getMessage}""")
+              }
               case _ => err.println(rc.getErrorMessage())
             }
             driver.close()
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 265e777..bcdcd07 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -509,4 +509,22 @@ class CliSuite extends SparkFunSuite with 
BeforeAndAfterAll with BeforeAndAfterE
       "SELECT \"legal 'string b\";select 22222 + 1;".stripMargin -> "22223"
     )
   }
+
+  test("AnalysisException with root cause will be printStacktrace") {
+    // If it is not in silent mode, will print the stacktrace
+    runCliWithin(
+      1.minute,
+      extraArgs = Seq("--hiveconf", "hive.session.silent=false",
+        "-e", "select date_sub(date'2011-11-11', '1.2');"),
+      errorResponses = Seq("NumberFormatException"))(
+      ("", "Error in query: The second argument of 'date_sub' function needs 
to be an integer."),
+      ("", "NumberFormatException: invalid input syntax for type numeric: 
1.2"))
+    // If it is in silent mode, will print the error message only
+    runCliWithin(
+      1.minute,
+      extraArgs = Seq("--conf", "spark.hive.session.silent=true",
+        "-e", "select date_sub(date'2011-11-11', '1.2');"),
+      errorResponses = Seq("AnalysisException"))(
+      ("", "Error in query: The second argument of 'date_sub' function needs 
to be an integer."))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to