This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 2d37bb08a979 [SPARK-54178][SQL] Improve error for ResolveSQLOnFile
2d37bb08a979 is described below

commit 2d37bb08a9798d9378d7fc51ce184c0ce167f26c
Author: Vladimir Golubev <[email protected]>
AuthorDate: Wed Nov 5 13:21:23 2025 -0800

    [SPARK-54178][SQL] Improve error for ResolveSQLOnFile
    
    ### What changes were proposed in this pull request?
    
    Improve error for ResolveSQLOnFile - generic error does not mean that the 
data source is not supported!
    
    ### Why are the changes needed?
    
    Currently `ResolveSQLOnFile` throws 
`UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY` for a generic failure when 
discovering files and figuring out the file schemas. This is confusing. We need 
a separate error.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Better error message.
    
    ### How was this patch tested?
    
    Hard to create a test, because the files need to be corrupted.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Claude.
    
    Closes #52875 from 
vladimirg-db/vladimir-golubev_data/better-error-for-resolve-sql-on-file.
    
    Authored-by: Vladimir Golubev <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 common/utils/src/main/resources/error/error-conditions.json       | 6 ++++++
 .../org/apache/spark/sql/errors/QueryCompilationErrors.scala      | 8 ++++++++
 .../scala/org/apache/spark/sql/execution/datasources/rules.scala  | 8 ++------
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index a34ceb9f1145..55f99208e0a1 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -1827,6 +1827,12 @@
     ],
     "sqlState" : "2203G"
   },
+  "FAILED_TO_CREATE_PLAN_FOR_DIRECT_QUERY" : {
+    "message" : [
+      "Failed to create plan for direct query on files: <dataSourceType>"
+    ],
+    "sqlState" : "58030"
+  },
   "FAILED_TO_LOAD_ROUTINE" : {
     "message" : [
       "Failed to load routine <routineName>."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 7d79c5d5d642..9847dbd6b197 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1770,6 +1770,14 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase with Compilat
       messageParameters = Map("provider" -> provider))
   }
 
+  def failedToCreatePlanForDirectQueryError(
+      dataSourceType: String, cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "FAILED_TO_CREATE_PLAN_FOR_DIRECT_QUERY",
+      messageParameters = Map("dataSourceType" -> dataSourceType),
+      cause = Some(cause))
+  }
+
   def findMultipleDataSourceError(provider: String, sourceNames: Seq[String]): 
Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1141",
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index c8cb5d7ce7c5..060d7fe72c0a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -65,12 +65,8 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends 
Rule[LogicalPlan] {
                 messageParameters = e.getMessageParameters.asScala.toMap)
             case _: ClassNotFoundException => None
             case e: Exception if !e.isInstanceOf[AnalysisException] =>
-              // the provider is valid, but failed to create a logical plan
-              u.failAnalysis(
-                errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
-                messageParameters = Map("dataSourceType" -> 
u.multipartIdentifier.head),
-                cause = e
-              )
+              throw 
QueryCompilationErrors.failedToCreatePlanForDirectQueryError(
+                u.multipartIdentifier.head, e)
           }
         case _ =>
           None


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to