This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 997e5383c680 [SPARK-53677][SQL] Improve debuggability for JDBC data 
source when query contains syntax error
997e5383c680 is described below

commit 997e5383c680a06daa79ac4d02f28ed5d93db90b
Author: Uros Stankovic <[email protected]>
AuthorDate: Wed Sep 24 21:49:07 2025 +0800

    [SPARK-53677][SQL] Improve debuggability for JDBC data source when query 
contains syntax error
    
    ### What changes were proposed in this pull request?
    - Improve the error message for syntax errors in the JDBC data source, 
since the customer complains that it is hard to fix a query currently, as they 
don't have hints from the remote engine parser.
    
    ### Why are the changes needed?
    - Improve UX
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, slight difference in error message
    
    ### How was this patch tested?
    Modified existing test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #52421 from 
urosstan-db/SPARK-53677-improve-jdbc-data-source-external-engine-syntax-error-debugability.
    
    Authored-by: Uros Stankovic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../utils/src/main/resources/error/error-conditions.json  |  2 +-
 .../spark/sql/jdbc/SharedJDBCIntegrationSuite.scala       | 15 ++++++++++++---
 .../spark/sql/execution/datasources/jdbc/JDBCRDD.scala    | 10 ++++++++--
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index 60bce107186f..d497586b64c3 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -4039,7 +4039,7 @@
   },
   "JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR" : {
     "message" : [
-      "JDBC external engine syntax error. The error was caused by the query 
<jdbcQuery>."
+      "JDBC external engine syntax error. The error was caused by the query 
<jdbcQuery>. <externalEngineError>."
     ],
     "subClass" : {
       "DURING_OUTPUT_SCHEMA_RESOLUTION" : {
diff --git 
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
 
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
index e42d04ead087..05338e79998c 100644
--- 
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
+++ 
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
@@ -55,13 +55,22 @@ abstract class SharedJDBCIntegrationSuite extends 
DockerJDBCIntegrationSuite {
   }
 
   test("SPARK-52184: Wrap external engine syntax error") {
-    val e = intercept[SparkException] {
-
+    val ex = intercept[SparkException] {
       spark.read.format("jdbc")
         .option("url", jdbcUrl)
         .option("query", "THIS IS NOT VALID SQL").load()
     }
-    assert(e.getCondition.startsWith("JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR"))
+
+    // Exception should be detected in analysis phase first when we resolve a 
schema from
+    // through JDBC by sending SELECT * FROM (<subquery>) [LIMIT 1][WHERE 1=0] 
query.
+    checkErrorMatchPVals(
+      ex,
+      condition = 
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_OUTPUT_SCHEMA_RESOLUTION",
+      parameters = Map(
+        "jdbcQuery" -> "SELECT \\* FROM \\(.*",
+        "externalEngineError" -> "[\\s\\S]*"
+      )
+    )
   }
 
   test("SPARK-53386: Parameter `query` should work when ending with 
semicolons") {
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 33cb5342fd3d..47f5f180789e 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -67,7 +67,10 @@ object JDBCRDD extends Logging {
       case e: SQLException if dialect.isSyntaxErrorBestEffort(e) =>
         throw new SparkException(
           errorClass = 
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_OUTPUT_SCHEMA_RESOLUTION",
-          messageParameters = Map("jdbcQuery" -> fullQuery),
+          messageParameters = Map(
+            "jdbcQuery" -> fullQuery,
+            "externalEngineError" -> e.getMessage.replaceAll("\\.+$", "")
+          ),
           cause = e)
     }
   }
@@ -335,7 +338,10 @@ class JDBCRDD(
         case e: SQLException if dialect.isSyntaxErrorBestEffort(e) =>
           throw new SparkException(
             errorClass = 
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_QUERY_EXECUTION",
-            messageParameters = Map("jdbcQuery" -> sqlText),
+            messageParameters = Map(
+              "jdbcQuery" -> sqlText,
+              "externalEngineError" -> e.getMessage.replaceAll("\\.+$", "")
+            ),
             cause = e)
       }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to