This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 01a6991a338c [SPARK-49138][SQL] Fix CollationTypeCasts of several
expressions
01a6991a338c is described below
commit 01a6991a338cc3f0eac61e97470ef9dbdb170971
Author: Mihailo Milosevic <[email protected]>
AuthorDate: Thu Aug 8 21:55:12 2024 +0800
[SPARK-49138][SQL] Fix CollationTypeCasts of several expressions
### What changes were proposed in this pull request?
Fix for CreateMap and ArrayAppend expressions.
### Why are the changes needed?
While adding TypeCoercion for collations these two expressions were missed
out, so this PR adds them to the rules so that they follow expected behaviour.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Added testcases in CollationSuite.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47649 from mihailom-db/fixtypecoercion.
Authored-by: Mihailo Milosevic <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
.../sql/catalyst/analysis/CollationTypeCasts.scala | 9 ++++-
.../org/apache/spark/sql/CollationSuite.scala | 38 +++++++++++++++++++++-
2 files changed, 45 insertions(+), 2 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
index 276062ce211d..9c7b5aaecd78 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
@@ -83,10 +83,17 @@ object CollationTypeCasts extends TypeCoercionRule {
val Seq(newInput, newDefault) = collateToSingleType(Seq(input, default))
framelessOffsetWindow.withNewChildren(Seq(newInput, offset, newDefault))
+ case mapCreate : CreateMap if mapCreate.children.size % 2 == 0 =>
+ // We only take in mapCreate if it has even number of children, as
otherwise it should fail
+ // with wrong number of arguments
+ val newKeys = collateToSingleType(mapCreate.keys)
+ val newValues = collateToSingleType(mapCreate.values)
+ mapCreate.withNewChildren(newKeys.zip(newValues).flatMap(pair =>
Seq(pair._1, pair._2)))
+
case otherExpr @ (
_: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Concat | _:
Greatest | _: Least |
_: Coalesce | _: ArrayContains | _: ArrayExcept | _: ConcatWs | _: Mask
| _: StringReplace |
- _: StringTranslate | _: StringTrim | _: StringTrimLeft | _:
StringTrimRight |
+ _: StringTranslate | _: StringTrim | _: StringTrimLeft | _:
StringTrimRight | _: ArrayAppend |
_: ArrayIntersect | _: ArrayPosition | _: ArrayRemove | _: ArrayUnion |
_: ArraysOverlap |
_: Contains | _: EndsWith | _: EqualNullSafe | _: EqualTo | _: FindInSet
| _: GreaterThan |
_: GreaterThanOrEqual | _: LessThan | _: LessThanOrEqual | _: StartsWith
| _: StringInstr |
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index e9e3432195a4..dd678ac48c68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -34,7 +34,7 @@ import
org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAg
import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
import org.apache.spark.sql.execution.joins._
import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
-import org.apache.spark.sql.types.{MapType, StringType, StructField,
StructType}
+import org.apache.spark.sql.types.{ArrayType, MapType, StringType,
StructField, StructType}
class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
protected val v2Source = classOf[FakeV2ProviderWithCustomSchema].getName
@@ -579,6 +579,42 @@ class CollationSuite extends DatasourceV2SQLBase with
AdaptiveSparkPlanHelper {
}
}
+ test("SPARK-49138: ArrayAppend and CreateMap coercion testing") {
+ val df_array_append = sql(s"SELECT array_append(array('a', 'b'), 'c'
COLLATE UNICODE)")
+ // array_append expression
+ checkAnswer(df_array_append, Seq(Row(Seq("a", "b", "c"))))
+ assert(df_array_append.schema.head.dataType ==
ArrayType(StringType("UNICODE"), true))
+
+ // make sure we fail this query even when collations are in
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c')")
+ },
+ errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+ parameters = Map("functionName" -> "`map`", "expectedNum" -> "2n (n >
0)",
+ "actualNum" -> "3", "docroot" ->
"https://spark.apache.org/docs/latest")
+ )
+
+ // make sure we fail this query even when collations are in
+ checkError(
+ exception = intercept[AnalysisException] {
+ sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c' COLLATE UNICODE,
'c')")
+ },
+ errorClass = "COLLATION_MISMATCH.EXPLICIT",
+ sqlState = "42P21",
+ parameters = Map(
+ "explicitTypes" ->
+ s"`string collate UTF8_LCASE`, `string collate UNICODE`"
+ )
+ )
+
+ // map creation keys respects proper collation
+ val df_create_map = sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c',
'c')")
+ checkAnswer(df_create_map, Seq(Row(Map("a" -> "b", "c" -> "c"))))
+ assert(df_create_map.schema.head.dataType ==
+ MapType(StringType("UTF8_LCASE"), StringType("UTF8_BINARY"), false))
+ }
+
test("SPARK-47692: Parameter marker with EXECUTE IMMEDIATE implicit
casting") {
sql(s"DECLARE stmtStr1 = 'SELECT collation(:var1 || :var2)';")
sql(s"DECLARE stmtStr2 = 'SELECT collation(:var1 || (\\\'a\\\' COLLATE
UNICODE))';")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]