This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0ef7afe0dc3 [SPARK-41931][SQL] Better error message for incomplete
complex type definition
0ef7afe0dc3 is described below
commit 0ef7afe0dc3723b97b750c071a908f363e514a26
Author: Runyao Chen <[email protected]>
AuthorDate: Fri Jan 27 18:06:32 2023 +0300
[SPARK-41931][SQL] Better error message for incomplete complex type
definition
### What changes were proposed in this pull request?
This PR improves error messages for `ARRAY` / `MAP` / `STRUCT` types
without element type specification. A new error class
`INCOMPLETE_TYPE_DEFINITION` with subclasses (`ARRAY`, `MAP`, and `STRUCT`) is
introduced.
**Details**
In the case where we do `CAST AS` or `CREATE` a complex type without
specifying its element type,
e.g.
```
CREATE TABLE t (col ARRAY)
```
`[UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"` error would be
thrown, while we do support the `ARRAY` type and just require it to be typed.
This PR proposes a better error message like
```
The definition of `ARRAY` type is incomplete. You must provide an element
type. For example: `ARRAY<elementType>`.
```
### Why are the changes needed?
The previous error message for incomplete complex types is confusing. A
`UNSUPPORTED_DATATYPE` error would be thrown, while we do support complex
types. We just require complex types to have their element types specified. We
need a clear error message with an example in this case.
### Does this PR introduce _any_ user-facing change?
Yes, this PR changes the error message which is user-facing.
Error message before this PR:
```
spark-sql> SELECT CAST(array(1, 2, 3) AS ARRAY);
[UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"(line 1, pos 30)
```
Error message after this PR:
```
[INCOMPLETE_TYPE_DEFINITION.ARRAY] Incomplete complex type: The definition
of `ARRAY` type is incomplete. You must provide an element type. For example:
`ARRAY<elementType>`.
```
Similarly for MAP and STRUCT types.
### How was this patch tested?
Added unit tests covering CAST and CREATE with ARRAY / STRUCT / MAP types
and their nested combinations.
Closes #39711 from RunyaoChen/better_error_msg_nested_type.
Lead-authored-by: Runyao Chen <[email protected]>
Co-authored-by: RunyaoChen <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
core/src/main/resources/error/error-classes.json | 23 +++++++
.../spark/sql/catalyst/parser/AstBuilder.scala | 2 +
.../spark/sql/errors/QueryParsingErrors.scala | 21 +++++++
.../spark/sql/errors/QueryParsingErrorsSuite.scala | 72 ++++++++++++++++++++++
4 files changed, 118 insertions(+)
diff --git a/core/src/main/resources/error/error-classes.json
b/core/src/main/resources/error/error-classes.json
index e6876751a22..ae766de3e20 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -592,6 +592,29 @@
"Detected an incompatible DataSourceRegister. Please remove the
incompatible library from classpath or upgrade it. Error: <message>"
]
},
+ "INCOMPLETE_TYPE_DEFINITION" : {
+ "message" : [
+ "Incomplete complex type:"
+ ],
+ "subClass" : {
+ "ARRAY" : {
+ "message" : [
+ "The definition of \"ARRAY\" type is incomplete. You must provide an
element type. For example: \"ARRAY<elementType>\"."
+ ]
+ },
+ "MAP" : {
+ "message" : [
+ "The definition of \"MAP\" type is incomplete. You must provide a
key type and a value type. For example: \"MAP<TIMESTAMP, INT>\"."
+ ]
+ },
+ "STRUCT" : {
+ "message" : [
+ "The definition of \"STRUCT\" type is incomplete. You must provide
at least one field type. For example: \"STRUCT<Field1: INT>\"."
+ ]
+ }
+ },
+ "sqlState" : "42K01"
+ },
"INCONSISTENT_BEHAVIOR_CROSS_VERSION" : {
"message" : [
"You may get a different result due to the upgrading to"
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index c6e50f3f514..d2a1cb1eb16 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2889,6 +2889,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef]
with SQLConfHelper wit
case ("interval", Nil) => CalendarIntervalType
case (dt @ ("character" | "char" | "varchar"), Nil) =>
throw QueryParsingErrors.charTypeMissingLengthError(dt, ctx)
+ case (dt @ ("array" | "struct" | "map"), Nil) =>
+ throw QueryParsingErrors.nestedTypeMissingElementTypeError(dt, ctx)
case (dt, params) =>
val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else
dt
throw QueryParsingErrors.dataTypeUnsupportedError(dtStr, ctx)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 29766251abd..e54bbb9c9d1 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -292,6 +292,27 @@ private[sql] object QueryParsingErrors extends
QueryErrorsBase {
ctx)
}
+ def nestedTypeMissingElementTypeError(
+ dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
+ dataType match {
+ case "array" =>
+ new ParseException(
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+ messageParameters = Map("elementType" -> "<INT>"),
+ ctx)
+ case "struct" =>
+ new ParseException(
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ messageParameters = Map.empty,
+ ctx)
+ case "map" =>
+ new ParseException(
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+ messageParameters = Map.empty,
+ ctx)
+ }
+ }
+
def partitionTransformNotExpectedError(
name: String, describe: String, ctx: ApplyTransformContext): Throwable =
{
new ParseException(
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 71483534d40..b30998b6aa0 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -546,4 +546,76 @@ class QueryParsingErrorsSuite extends QueryTest with
SharedSparkSession {
start = 0,
stop = 124))
}
+
+ test("INCOMPLETE_TYPE_DEFINITION: array type definition is incomplete") {
+ // Cast simple array without specifying element type
+ checkError(
+ exception = parseException("SELECT CAST(array(1,2,3) AS ARRAY)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+ sqlState = "42K01",
+ parameters = Map("elementType" -> "<INT>"),
+ context = ExpectedContext(fragment = "ARRAY", start = 28, stop = 32))
+ // Cast array of array without specifying element type for inner array
+ checkError(
+ exception = parseException("SELECT CAST(array(array(3)) AS
ARRAY<ARRAY>)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+ sqlState = "42K01",
+ parameters = Map("elementType" -> "<INT>"),
+ context = ExpectedContext(fragment = "ARRAY", start = 37, stop = 41))
+ // Create column of array type without specifying element type
+ checkError(
+ exception = parseException("CREATE TABLE tbl_120691 (col1 ARRAY)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+ sqlState = "42K01",
+ parameters = Map("elementType" -> "<INT>"),
+ context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34))
+ }
+
+ test("INCOMPLETE_TYPE_DEFINITION: struct type definition is incomplete") {
+ // Cast simple struct without specifying field type
+ checkError(
+ exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "STRUCT", start = 29, stop = 34))
+ // Cast array of struct without specifying field type in struct
+ checkError(
+ exception = parseException("SELECT CAST(array(struct(1,2)) AS
ARRAY<STRUCT>)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "STRUCT", start = 40, stop = 45))
+ // Create column of struct type without specifying field type
+ checkError(
+ exception = parseException("CREATE TABLE tbl_120691 (col1 STRUCT)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "STRUCT", start = 30, stop = 35))
+ // Invalid syntax `STRUCT<INT>` without field name
+ checkError(
+ exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT<INT>)"),
+ errorClass = "PARSE_SYNTAX_ERROR",
+ sqlState = "42601",
+ parameters = Map("error" -> "'>'", "hint" -> ""))
+ }
+
+ test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
+ // Cast simple map without specifying element type
+ checkError(
+ exception = parseException("SELECT CAST(map(1,'2') AS MAP)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "MAP", start = 26, stop = 28))
+ // Create column of map type without specifying key/value types
+ checkError(
+ exception = parseException("CREATE TABLE tbl_120691 (col1 MAP)"),
+ errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+ sqlState = "42K01",
+ context = ExpectedContext(fragment = "MAP", start = 30, stop = 32))
+ // Invalid syntax `MAP<String>` with only key type
+ checkError(
+ exception = parseException("SELECT CAST(map('1',2) AS MAP<STRING>)"),
+ errorClass = "PARSE_SYNTAX_ERROR",
+ sqlState = "42601",
+ parameters = Map("error" -> "'>'", "hint" -> ""))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]