This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 731a104df307 [SPARK-47261][SQL] Assign better name for errors 
_LEGACY_ERROR_TEMP_1172, _LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174
731a104df307 is described below

commit 731a104df30751ca0c333e621129564f34b81994
Author: junyuc25 <[email protected]>
AuthorDate: Fri Aug 9 11:54:27 2024 +0200

    [SPARK-47261][SQL] Assign better name for errors _LEGACY_ERROR_TEMP_1172, 
_LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174
    
    ### What changes were proposed in this pull request?
    
    Assign better name for error _LEGACY_ERROR_TEMP_1172, 
_LEGACY_ERROR_TEMP_1173, and _LEGACY_ERROR_TEMP_1174.
    
    ### Why are the changes needed?
    
    Proper name improves user experience with Spark SQL.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes.
    
    ### How was this patch tested?
    
    Added new tests and ran all the tests in the suite:
    ```
    org.apache.spark.sql.execution.datasources.parquetParquetSchemaSuite
    org.apache.spark.SparkThrowableSuite
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No
    
    Closes #47421 from junyuc25/SPARK-47261.
    
    Authored-by: junyuc25 <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
---
 .../src/main/resources/error/error-conditions.json |  33 ++++++++++---------
 .../spark/sql/errors/QueryCompilationErrors.scala  |   6 ++--
 ...p-field-with-enum-as-logical-annotation.parquet | Bin 0 -> 409 bytes
 .../interval-using-fixed-len-byte-array.parquet    | Bin 0 -> 369 bytes
 .../datasources/parquet/ParquetSchemaSuite.scala   |  35 ++++++++++++++++++---
 5 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-conditions.json 
b/common/utils/src/main/resources/error/error-conditions.json
index 26bda26fef28..4766c7790915 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -3543,6 +3543,24 @@
     ],
     "sqlState" : "42805"
   },
+  "PARQUET_TYPE_ILLEGAL" : {
+    "message" : [
+      "Illegal Parquet type: <parquetType>."
+    ],
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_NOT_RECOGNIZED" : {
+    "message" : [
+      "Unrecognized Parquet type: <field>."
+    ],
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_NOT_SUPPORTED" : {
+    "message" : [
+      "Parquet type not yet supported: <parquetType>."
+    ],
+    "sqlState" : "42846"
+  },
   "PARSE_EMPTY_STATEMENT" : {
     "message" : [
       "Syntax error, unexpected empty statement."
@@ -5881,21 +5899,6 @@
       "createTableColumnTypes option column <col> not found in schema 
<schema>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1172" : {
-    "message" : [
-      "Parquet type not yet supported: <parquetType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1173" : {
-    "message" : [
-      "Illegal Parquet type: <parquetType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1174" : {
-    "message" : [
-      "Unrecognized Parquet type: <field>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1181" : {
     "message" : [
       "Stream-stream join without equality predicate is not supported."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index cf801e3caacb..09dfa6b3b603 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1995,19 +1995,19 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase with Compilat
 
   def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1172",
+      errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
       messageParameters = Map("parquetType" -> parquetType))
   }
 
   def illegalParquetTypeError(parquetType: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1173",
+      errorClass = "PARQUET_TYPE_ILLEGAL",
       messageParameters = Map("parquetType" -> parquetType))
   }
 
   def unrecognizedParquetTypeError(field: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1174",
+      errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
       messageParameters = Map("field" -> field))
   }
 
diff --git 
a/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
 
b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
new file mode 100644
index 000000000000..d315eb467a02
Binary files /dev/null and 
b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
 differ
diff --git 
a/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
 
b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
new file mode 100644
index 000000000000..1504c6e4b4c8
Binary files /dev/null and 
b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
 differ
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 25f6af1cc338..074781da830f 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1111,10 +1111,37 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
 
   test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default 
nanosAsLong=false)") {
     val testDataPath = testFile("test-data/timestamp-nanos.parquet")
-    val e = intercept[AnalysisException] {
-      spark.read.parquet(testDataPath).collect()
-    }
-    assert(e.getMessage.contains("Illegal Parquet type: INT64 
(TIMESTAMP(NANOS,true))."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      errorClass = "PARQUET_TYPE_ILLEGAL",
+      parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
+    )
+  }
+
+  test("SPARK-47261: parquet file with unsupported type") {
+    val testDataPath = 
testFile("test-data/interval-using-fixed-len-byte-array.parquet")
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
+      parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)")
+    )
+  }
+
+  test("SPARK-47261: parquet file with unrecognized parquet type") {
+    val testDataPath = 
testFile("test-data/group-field-with-enum-as-logical-annotation.parquet")
+    val expectedParameter = "required group my_list (ENUM) {\n  repeated group 
list {\n" +
+      "    optional binary element (STRING);\n  }\n}"
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
+      parameters = Map("field" -> expectedParameter)
+    )
   }
 
   // =======================================================


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to