(spark) branch master updated: [SPARK-49138][SQL] Fix CollationTypeCasts of several expressions

wenchen Thu, 08 Aug 2024 06:55:31 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 01a6991a338c [SPARK-49138][SQL] Fix CollationTypeCasts of several 
expressions
01a6991a338c is described below

commit 01a6991a338cc3f0eac61e97470ef9dbdb170971
Author: Mihailo Milosevic <[email protected]>
AuthorDate: Thu Aug 8 21:55:12 2024 +0800

    [SPARK-49138][SQL] Fix CollationTypeCasts of several expressions
    
    ### What changes were proposed in this pull request?
    Fix for CreateMap and ArrayAppend expressions.
    
    ### Why are the changes needed?
    While adding TypeCoercion for collations these two expressions were missed 
out, so this PR adds them to the rules so that they follow expected behaviour.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Added testcases in CollationSuite.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No.
    
    Closes #47649 from mihailom-db/fixtypecoercion.
    
    Authored-by: Mihailo Milosevic <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/analysis/CollationTypeCasts.scala |  9 ++++-
 .../org/apache/spark/sql/CollationSuite.scala      | 38 +++++++++++++++++++++-
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
index 276062ce211d..9c7b5aaecd78 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
@@ -83,10 +83,17 @@ object CollationTypeCasts extends TypeCoercionRule {
       val Seq(newInput, newDefault) = collateToSingleType(Seq(input, default))
       framelessOffsetWindow.withNewChildren(Seq(newInput, offset, newDefault))
 
+    case mapCreate : CreateMap if mapCreate.children.size % 2 == 0 =>
+      // We only take in mapCreate if it has even number of children, as 
otherwise it should fail
+      // with wrong number of arguments
+      val newKeys = collateToSingleType(mapCreate.keys)
+      val newValues = collateToSingleType(mapCreate.values)
+      mapCreate.withNewChildren(newKeys.zip(newValues).flatMap(pair => 
Seq(pair._1, pair._2)))
+
     case otherExpr @ (
       _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Concat | _: 
Greatest | _: Least |
       _: Coalesce | _: ArrayContains | _: ArrayExcept | _: ConcatWs | _: Mask 
| _: StringReplace |
-      _: StringTranslate | _: StringTrim | _: StringTrimLeft | _: 
StringTrimRight |
+      _: StringTranslate | _: StringTrim | _: StringTrimLeft | _: 
StringTrimRight | _: ArrayAppend |
       _: ArrayIntersect | _: ArrayPosition | _: ArrayRemove | _: ArrayUnion | 
_: ArraysOverlap |
       _: Contains | _: EndsWith | _: EqualNullSafe | _: EqualTo | _: FindInSet 
| _: GreaterThan |
       _: GreaterThanOrEqual | _: LessThan | _: LessThanOrEqual | _: StartsWith 
| _: StringInstr |
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index e9e3432195a4..dd678ac48c68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -34,7 +34,7 @@ import 
org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAg
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
-import org.apache.spark.sql.types.{MapType, StringType, StructField, 
StructType}
+import org.apache.spark.sql.types.{ArrayType, MapType, StringType, 
StructField, StructType}
 
 class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   protected val v2Source = classOf[FakeV2ProviderWithCustomSchema].getName
@@ -579,6 +579,42 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
     }
   }
 
+  test("SPARK-49138: ArrayAppend and CreateMap coercion testing") {
+    val df_array_append = sql(s"SELECT array_append(array('a', 'b'), 'c' 
COLLATE UNICODE)")
+    // array_append expression
+    checkAnswer(df_array_append, Seq(Row(Seq("a", "b", "c"))))
+    assert(df_array_append.schema.head.dataType == 
ArrayType(StringType("UNICODE"), true))
+
+    // make sure we fail this query even when collations are in
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c')")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map("functionName" -> "`map`", "expectedNum" -> "2n (n > 
0)",
+        "actualNum" -> "3", "docroot" -> 
"https://spark.apache.org/docs/latest";)
+    )
+
+    // make sure we fail this query even when collations are in
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c' COLLATE UNICODE, 
'c')")
+      },
+      errorClass = "COLLATION_MISMATCH.EXPLICIT",
+      sqlState = "42P21",
+      parameters = Map(
+        "explicitTypes" ->
+          s"`string collate UTF8_LCASE`, `string collate UNICODE`"
+      )
+    )
+
+    // map creation keys respects proper collation
+    val df_create_map = sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c', 
'c')")
+    checkAnswer(df_create_map, Seq(Row(Map("a" -> "b", "c" -> "c"))))
+    assert(df_create_map.schema.head.dataType ==
+      MapType(StringType("UTF8_LCASE"), StringType("UTF8_BINARY"), false))
+  }
+
   test("SPARK-47692: Parameter marker with EXECUTE IMMEDIATE implicit 
casting") {
     sql(s"DECLARE stmtStr1 = 'SELECT collation(:var1 || :var2)';")
     sql(s"DECLARE stmtStr2 = 'SELECT collation(:var1 || (\\\'a\\\' COLLATE 
UNICODE))';")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-49138][SQL] Fix CollationTypeCasts of several expressions

Reply via email to