This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 9627e29c948d [SPARK-55860][SQL] Use `UNABLE_TO_INFER_SCHEMA` instead
of `UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE`
9627e29c948d is described below
commit 9627e29c948d54818187d9703a83868f93c5e566
Author: ilicmarkodb <[email protected]>
AuthorDate: Fri Mar 6 23:20:13 2026 +0800
[SPARK-55860][SQL] Use `UNABLE_TO_INFER_SCHEMA` instead of
`UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE`
### What changes were proposed in this pull request?
I propose to use `UNABLE_TO_INFER_SCHEMA` instead of
`UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE`. This is because both error messages
are used in the same context, and even though
`UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE` is more descriptive, we can't go the
other way around and use that error class because `UNABLE_TO_INFER_SCHEMA` was
introduced long ago, and removing it would be a behavior change.
`UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE` was added recently to remove the
legacy error.
### Why are the changes needed?
More consistent error messaging.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #54654 from ilicmarkodb/fix_UNABLE_TO_INFER_SCHEMA.
Authored-by: ilicmarkodb <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
common/utils/src/main/resources/error/error-conditions.json | 6 ------
.../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 8 --------
.../org/apache/spark/sql/execution/datasources/DataSource.scala | 3 +--
.../org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala | 8 +++-----
4 files changed, 4 insertions(+), 21 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 8f82244e1987..5db07c4cea52 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -6562,12 +6562,6 @@
],
"sqlState" : "42KD9"
},
- "UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE" : {
- "message" : [
- "Unable to infer schema for <format> at <fileCatalog>. It must be
specified manually."
- ],
- "sqlState" : "42KD9"
- },
"UNBOUND_SQL_PARAMETER" : {
"message" : [
"Found the unbound parameter: <name>. Please, fix `args` and provide a
mapping of the parameter to either a SQL literal or collection constructor
functions such as `map()`, `array()`, `struct()`."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index e00dea31ba73..a784276200c0 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1770,14 +1770,6 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
"actualSchema" -> actualSchema.toDDL))
}
- def dataSchemaNotSpecifiedError(format: String, fileCatalog: String):
Throwable = {
- new AnalysisException(
- errorClass = "UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE",
- messageParameters = Map(
- "format" -> format,
- "fileCatalog" -> fileCatalog))
- }
-
def invalidDataSourceError(className: String): Throwable = {
new AnalysisException(
errorClass = "_LEGACY_ERROR_TEMP_1135",
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index be1f05da308f..9b51d3763abb 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -403,8 +403,7 @@ case class DataSource(
caseInsensitiveOptions - "path",
fileCatalog.allFiles())
}.getOrElse {
- throw QueryCompilationErrors.dataSchemaNotSpecifiedError(
- format.toString, fileCatalog.allFiles().mkString(","))
+ throw
QueryCompilationErrors.dataSchemaNotSpecifiedError(format.toString)
}
HadoopFsRelation(
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 225d1642bb4a..05e11cc2c301 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -1113,7 +1113,7 @@ class QueryCompilationErrorsSuite
)
}
- test("UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE: empty data source at path") {
+ test("UNABLE_TO_INFER_SCHEMA: empty data source at path") {
withTempDir { dir =>
// Create _spark_metadata with a valid empty log entry (version header
only, no files)
val metadataDir = new java.io.File(dir, "_spark_metadata")
@@ -1125,10 +1125,8 @@ class QueryCompilationErrorsSuite
exception = intercept[AnalysisException] {
spark.read.format("json").load(dir.getCanonicalPath).collect()
},
- condition = "UNABLE_TO_INFER_SCHEMA_FOR_DATA_SOURCE",
- parameters = Map(
- "format" -> "JSON",
- "fileCatalog" -> "")
+ condition = "UNABLE_TO_INFER_SCHEMA",
+ parameters = Map("format" -> "JSON")
)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]