(spark) branch master updated: [SPARK-46949][SQL] Support CHAR/VARCHAR through ResolveDefaultColumns

yao Fri, 02 Feb 2024 01:39:35 -0800

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 362a4d43159d [SPARK-46949][SQL] Support CHAR/VARCHAR through 
ResolveDefaultColumns
362a4d43159d is described below

commit 362a4d43159d73ef2e4c4f9267b572046220680f
Author: Kent Yao <[email protected]>
AuthorDate: Fri Feb 2 17:39:12 2024 +0800

    [SPARK-46949][SQL] Support CHAR/VARCHAR through ResolveDefaultColumns
    
    ### What changes were proposed in this pull request?
    
    We have issues resolving column definitions with default values, i.e., `c 
CHAR(5) DEFAULT 'foo'`, `v  VARCHAR(10) DEFAULT 'bar'`. The reason is that 
CAHR/VARCHAR types in schemas are not supplanted with STRING type to align with 
the value expression.
    
    This PR fixes these issues in `ResolveDefaultColumns`, which seems to only 
cover the v1 tables. When I applied some related tests to v2 tables, they had 
the same issues. But beyond that, there are some other front-loading needs to 
be addressed. In this case, I'd like to separate v2 from the v1 PR.
    
    ### Why are the changes needed?
    
    bugfix
    
    ```
    spark-sql (default)> CREATE TABLE t( c CHAR(5) DEFAULT 'spark') USING 
parquet;
    [INVALID_DEFAULT_VALUE.DATA_TYPE] Failed to execute CREATE TABLE command 
because the destination table column `c` has a DEFAULT value 'spark', which 
requires "CHAR(5)" type, but the statement provided a value of incompatible 
"STRING" type.
    ```
    ### Does this PR introduce _any_ user-facing change?
    
    no, bugfix
    ### How was this patch tested?
    
    new tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #44991 from yaooqinn/SPARK-46949.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
---
 .../catalyst/util/ResolveDefaultColumnsUtil.scala  | 26 +++++++++-----
 .../spark/sql/ResolveDefaultColumnsSuite.scala     | 41 ++++++++++++++++++++++
 2 files changed, 58 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index e782e017d1ef..da03de73557f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, 
InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
@@ -42,7 +42,9 @@ import org.apache.spark.util.ArrayImplicits._
 /**
  * This object contains fields to help process DEFAULT columns.
  */
-object ResolveDefaultColumns extends QueryErrorsBase with 
ResolveDefaultColumnsUtils {
+object ResolveDefaultColumns extends QueryErrorsBase
+  with ResolveDefaultColumnsUtils
+  with SQLConfHelper {
   // Name of attributes representing explicit references to the value stored 
in the above
   // CURRENT_DEFAULT_COLUMN_METADATA.
   val CURRENT_DEFAULT_COLUMN_NAME = "DEFAULT"
@@ -307,25 +309,26 @@ object ResolveDefaultColumns extends QueryErrorsBase with 
ResolveDefaultColumnsU
       statementType: String,
       colName: String,
       defaultSQL: String): Expression = {
+    val supplanted = CharVarcharUtils.replaceCharVarcharWithString(dataType)
     // Perform implicit coercion from the provided expression type to the 
required column type.
-    if (dataType == analyzed.dataType) {
+    val ret = if (supplanted == analyzed.dataType) {
       analyzed
-    } else if (Cast.canUpCast(analyzed.dataType, dataType)) {
-      Cast(analyzed, dataType)
+    } else if (Cast.canUpCast(analyzed.dataType, supplanted)) {
+      Cast(analyzed, supplanted)
     } else {
       // If the provided default value is a literal of a wider type than the 
target column, but the
       // literal value fits within the narrower type, just coerce it for 
convenience. Exclude
       // boolean/array/struct/map types from consideration for this type 
coercion to avoid
       // surprising behavior like interpreting "false" as integer zero.
       val result = if (analyzed.isInstanceOf[Literal] &&
-        !Seq(dataType, analyzed.dataType).exists(_ match {
+        !Seq(supplanted, analyzed.dataType).exists(_ match {
           case _: BooleanType | _: ArrayType | _: StructType | _: MapType => 
true
           case _ => false
         })) {
         try {
-          val casted = Cast(analyzed, dataType, evalMode = EvalMode.TRY).eval()
+          val casted = Cast(analyzed, supplanted, evalMode = 
EvalMode.TRY).eval()
           if (casted != null) {
-            Some(Literal(casted, dataType))
+            Some(Literal(casted, supplanted))
           } else {
             None
           }
@@ -339,6 +342,10 @@ object ResolveDefaultColumns extends QueryErrorsBase with 
ResolveDefaultColumnsU
           statementType, colName, defaultSQL, dataType, analyzed.dataType)
       }
     }
+    if (!conf.charVarcharAsString && 
CharVarcharUtils.hasCharVarchar(dataType)) {
+      CharVarcharUtils.stringLengthCheck(ret, dataType).eval(EmptyRow)
+    }
+    ret
   }
 
   /**
@@ -454,7 +461,8 @@ object ResolveDefaultColumns extends QueryErrorsBase with 
ResolveDefaultColumnsU
       throw 
QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
         statement, colName, default.originalSQL)
     } else if (default.resolved) {
-      if (!Cast.canUpCast(default.child.dataType, targetType)) {
+      val dataType = CharVarcharUtils.replaceCharVarcharWithString(targetType)
+      if (!Cast.canUpCast(default.child.dataType, dataType)) {
         throw QueryCompilationErrors.defaultValuesDataTypeError(
           statement, colName, default.originalSQL, targetType, 
default.child.dataType)
       }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index 00529559a485..0b393f4b11b4 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.test.SharedSparkSession
 
 class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
@@ -215,4 +216,44 @@ class ResolveDefaultColumnsSuite extends QueryTest with 
SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-46949: DDL with valid default char/varchar values") {
+    withTable("t") {
+      val ddl =
+        s"""
+           |CREATE TABLE t(
+           |  key int,
+           |  v VARCHAR(6) DEFAULT 'apache',
+           |  c CHAR(5) DEFAULT 'spark')
+           |USING parquet""".stripMargin
+      sql(ddl)
+      sql("INSERT INTO t (key) VALUES(1)")
+      checkAnswer(sql("select * from t"), Row(1, "apache", "spark"))
+    }
+  }
+
+  test("SPARK-46949: DDL with invalid default char/varchar values") {
+    Seq("CHAR", "VARCHAR").foreach { typeName =>
+      checkError(
+        exception = intercept[SparkRuntimeException](
+          sql(s"CREATE TABLE t(c $typeName(3) DEFAULT 'spark') USING 
parquet")),
+        errorClass = "EXCEED_LIMIT_LENGTH",
+        parameters = Map("limit" -> "3"))
+    }
+  }
+
+  test("SPARK-46949: DDL with default char/varchar values need padding") {
+    withTable("t") {
+      val ddl =
+        s"""
+           |CREATE TABLE t(
+           |  key int,
+           |  v VARCHAR(6) DEFAULT 'apache',
+           |  c CHAR(6) DEFAULT 'spark')
+           |USING parquet""".stripMargin
+      sql(ddl)
+      sql("INSERT INTO t (key) VALUES(1)")
+      checkAnswer(sql("select * from t"), Row(1, "apache", "spark "))
+    }
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-46949][SQL] Support CHAR/VARCHAR through ResolveDefaultColumns

Reply via email to