This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 997e5383c680 [SPARK-53677][SQL] Improve debuggability for JDBC data
source when query contains syntax error
997e5383c680 is described below
commit 997e5383c680a06daa79ac4d02f28ed5d93db90b
Author: Uros Stankovic <[email protected]>
AuthorDate: Wed Sep 24 21:49:07 2025 +0800
[SPARK-53677][SQL] Improve debuggability for JDBC data source when query
contains syntax error
### What changes were proposed in this pull request?
- Improve the error message for syntax errors in the JDBC data source,
since the customer complains that it is hard to fix a query currently, as they
don't have hints from the remote engine parser.
### Why are the changes needed?
- Improve UX
### Does this PR introduce _any_ user-facing change?
Yes, slight difference in error message
### How was this patch tested?
Modified existing test
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #52421 from
urosstan-db/SPARK-53677-improve-jdbc-data-source-external-engine-syntax-error-debugability.
Authored-by: Uros Stankovic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../utils/src/main/resources/error/error-conditions.json | 2 +-
.../spark/sql/jdbc/SharedJDBCIntegrationSuite.scala | 15 ++++++++++++---
.../spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 10 ++++++++--
3 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 60bce107186f..d497586b64c3 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -4039,7 +4039,7 @@
},
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR" : {
"message" : [
- "JDBC external engine syntax error. The error was caused by the query
<jdbcQuery>."
+ "JDBC external engine syntax error. The error was caused by the query
<jdbcQuery>. <externalEngineError>."
],
"subClass" : {
"DURING_OUTPUT_SCHEMA_RESOLUTION" : {
diff --git
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
index e42d04ead087..05338e79998c 100644
---
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
+++
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/SharedJDBCIntegrationSuite.scala
@@ -55,13 +55,22 @@ abstract class SharedJDBCIntegrationSuite extends
DockerJDBCIntegrationSuite {
}
test("SPARK-52184: Wrap external engine syntax error") {
- val e = intercept[SparkException] {
-
+ val ex = intercept[SparkException] {
spark.read.format("jdbc")
.option("url", jdbcUrl)
.option("query", "THIS IS NOT VALID SQL").load()
}
- assert(e.getCondition.startsWith("JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR"))
+
+ // Exception should be detected in analysis phase first when we resolve a
schema from
+ // through JDBC by sending SELECT * FROM (<subquery>) [LIMIT 1][WHERE 1=0]
query.
+ checkErrorMatchPVals(
+ ex,
+ condition =
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_OUTPUT_SCHEMA_RESOLUTION",
+ parameters = Map(
+ "jdbcQuery" -> "SELECT \\* FROM \\(.*",
+ "externalEngineError" -> "[\\s\\S]*"
+ )
+ )
}
test("SPARK-53386: Parameter `query` should work when ending with
semicolons") {
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 33cb5342fd3d..47f5f180789e 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -67,7 +67,10 @@ object JDBCRDD extends Logging {
case e: SQLException if dialect.isSyntaxErrorBestEffort(e) =>
throw new SparkException(
errorClass =
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_OUTPUT_SCHEMA_RESOLUTION",
- messageParameters = Map("jdbcQuery" -> fullQuery),
+ messageParameters = Map(
+ "jdbcQuery" -> fullQuery,
+ "externalEngineError" -> e.getMessage.replaceAll("\\.+$", "")
+ ),
cause = e)
}
}
@@ -335,7 +338,10 @@ class JDBCRDD(
case e: SQLException if dialect.isSyntaxErrorBestEffort(e) =>
throw new SparkException(
errorClass =
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR.DURING_QUERY_EXECUTION",
- messageParameters = Map("jdbcQuery" -> sqlText),
+ messageParameters = Map(
+ "jdbcQuery" -> sqlText,
+ "externalEngineError" -> e.getMessage.replaceAll("\\.+$", "")
+ ),
cause = e)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]