This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 731a104df307 [SPARK-47261][SQL] Assign better name for errors
_LEGACY_ERROR_TEMP_1172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174
731a104df307 is described below
commit 731a104df30751ca0c333e621129564f34b81994
Author: junyuc25 <[email protected]>
AuthorDate: Fri Aug 9 11:54:27 2024 +0200
[SPARK-47261][SQL] Assign better name for errors _LEGACY_ERROR_TEMP_1172,
_LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174
### What changes were proposed in this pull request?
Assign better name for error _LEGACY_ERROR_TEMP_1172,
_LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174.
### Why are the changes needed?
Proper name improves user experience with Spark SQL.
### Does this PR introduce _any_ user-facing change?
Yes.
### How was this patch tested?
Added new tests and ran all the tests in the suite:
```
org.apache.spark.sql.execution.datasources.parquetParquetSchemaSuite
org.apache.spark.SparkThrowableSuite
```
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #47421 from junyuc25/SPARK-47261.
Authored-by: junyuc25 <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../src/main/resources/error/error-conditions.json | 33 ++++++++++---------
.../spark/sql/errors/QueryCompilationErrors.scala | 6 ++--
...p-field-with-enum-as-logical-annotation.parquet | Bin 0 -> 409 bytes
.../interval-using-fixed-len-byte-array.parquet | Bin 0 -> 369 bytes
.../datasources/parquet/ParquetSchemaSuite.scala | 35 ++++++++++++++++++---
5 files changed, 52 insertions(+), 22 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-conditions.json
b/common/utils/src/main/resources/error/error-conditions.json
index 26bda26fef28..4766c7790915 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3543,6 +3543,24 @@
],
"sqlState" : "42805"
},
+ "PARQUET_TYPE_ILLEGAL" : {
+ "message" : [
+ "Illegal Parquet type: <parquetType>."
+ ],
+ "sqlState" : "42846"
+ },
+ "PARQUET_TYPE_NOT_RECOGNIZED" : {
+ "message" : [
+ "Unrecognized Parquet type: <field>."
+ ],
+ "sqlState" : "42846"
+ },
+ "PARQUET_TYPE_NOT_SUPPORTED" : {
+ "message" : [
+ "Parquet type not yet supported: <parquetType>."
+ ],
+ "sqlState" : "42846"
+ },
"PARSE_EMPTY_STATEMENT" : {
"message" : [
"Syntax error, unexpected empty statement."
@@ -5881,21 +5899,6 @@
"createTableColumnTypes option column <col> not found in schema
<schema>."
]
},
- "_LEGACY_ERROR_TEMP_1172" : {
- "message" : [
- "Parquet type not yet supported: <parquetType>."
- ]
- },
- "_LEGACY_ERROR_TEMP_1173" : {
- "message" : [
- "Illegal Parquet type: <parquetType>."
- ]
- },
- "_LEGACY_ERROR_TEMP_1174" : {
- "message" : [
- "Unrecognized Parquet type: <field>."
- ]
- },
"_LEGACY_ERROR_TEMP_1181" : {
"message" : [
"Stream-stream join without equality predicate is not supported."
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index cf801e3caacb..09dfa6b3b603 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1995,19 +1995,19 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1172",
+ errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
messageParameters = Map("parquetType" -> parquetType))
}
def illegalParquetTypeError(parquetType: String): Throwable = {
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1173",
+ errorClass = "PARQUET_TYPE_ILLEGAL",
messageParameters = Map("parquetType" -> parquetType))
}
def unrecognizedParquetTypeError(field: String): Throwable = {
new AnalysisException(
- errorClass = "_LEGACY_ERROR_TEMP_1174",
+ errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
messageParameters = Map("field" -> field))
}
diff --git
a/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
new file mode 100644
index 000000000000..d315eb467a02
Binary files /dev/null and
b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
differ
diff --git
a/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
new file mode 100644
index 000000000000..1504c6e4b4c8
Binary files /dev/null and
b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
differ
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 25f6af1cc338..074781da830f 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1111,10 +1111,37 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default
nanosAsLong=false)") {
val testDataPath = testFile("test-data/timestamp-nanos.parquet")
- val e = intercept[AnalysisException] {
- spark.read.parquet(testDataPath).collect()
- }
- assert(e.getMessage.contains("Illegal Parquet type: INT64
(TIMESTAMP(NANOS,true))."))
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.parquet(testDataPath).collect()
+ },
+ errorClass = "PARQUET_TYPE_ILLEGAL",
+ parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
+ )
+ }
+
+ test("SPARK-47261: parquet file with unsupported type") {
+ val testDataPath =
testFile("test-data/interval-using-fixed-len-byte-array.parquet")
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.parquet(testDataPath).collect()
+ },
+ errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
+ parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)")
+ )
+ }
+
+ test("SPARK-47261: parquet file with unrecognized parquet type") {
+ val testDataPath =
testFile("test-data/group-field-with-enum-as-logical-annotation.parquet")
+ val expectedParameter = "required group my_list (ENUM) {\n repeated group
list {\n" +
+ " optional binary element (STRING);\n }\n}"
+ checkError(
+ exception = intercept[AnalysisException] {
+ spark.read.parquet(testDataPath).collect()
+ },
+ errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
+ parameters = Map("field" -> expectedParameter)
+ )
}
// =======================================================
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]