This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new c1e6e14  [SPARK-31091] Revert SPARK-24640 Return `NULL` from 
`size(NULL)` by default
c1e6e14 is described below

commit c1e6e1439d1a79560f197e2627a334fcd0bb8a28
Author: Wenchen Fan <[email protected]>
AuthorDate: Wed Mar 11 09:55:24 2020 -0700

    [SPARK-31091] Revert SPARK-24640 Return `NULL` from `size(NULL)` by default
    
    ### What changes were proposed in this pull request?
    
    This PR reverts https://github.com/apache/spark/pull/26051 and 
https://github.com/apache/spark/pull/26066
    
    ### Why are the changes needed?
    
    There is no standard requiring that `size(null)` must return null, and 
returning -1 looks reasonable as well. This is kind of a cosmetic change and we 
should avoid it if it breaks existing queries. This is similar to reverting 
TRIM function parameter order change.
    
    ### Does this PR introduce any user-facing change?
    
    Yes, change the behavior of `size(null)` back to be the same as 2.4.
    
    ### How was this patch tested?
    
    N/A
    
    Closes #27834 from cloud-fan/revert.
    
    Authored-by: Wenchen Fan <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 8efb71013d0c9e8d81430aa48f88b91929425bff)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 docs/sql-migration-guide.md                                           | 2 --
 .../apache/spark/sql/catalyst/expressions/collectionOperations.scala  | 4 ++--
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala        | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 6c73038..e7ac9f0 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -214,8 +214,6 @@ license: |
     - `now` - current query start time
   For example `SELECT timestamp 'tomorrow';`.
 
-  - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input. 
In Spark version 2.4 and earlier, this function gives `-1` for the same input. 
To restore the behavior before Spark 3.0, you can set 
`spark.sql.legacy.sizeOfNull` to `true`.
-  
   - Since Spark 3.0, when the `array`/`map` function is called without any 
parameters, it returns an empty collection with `NullType` as element type. In 
Spark version 2.4 and earlier, it returns an empty collection with `StringType` 
as element type. To restore the behavior before Spark 3.0, you can set 
`spark.sql.legacy.createEmptyCollectionUsingStringType` to `true`.
 
   - Since Spark 3.0, the interval literal syntax does not allow multiple 
from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2' 
YEAR TO MONTH'` throws parser exception.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index cfa877b..6d95909 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -79,7 +79,7 @@ trait BinaryArrayExpressionWithImplicitCast extends 
BinaryExpression
     _FUNC_(expr) - Returns the size of an array or a map.
     The function returns -1 if its input is null and 
spark.sql.legacy.sizeOfNull is set to true.
     If spark.sql.legacy.sizeOfNull is set to false, the function returns null 
for null input.
-    By default, the spark.sql.legacy.sizeOfNull parameter is set to false.
+    By default, the spark.sql.legacy.sizeOfNull parameter is set to true.
   """,
   examples = """
     Examples:
@@ -88,7 +88,7 @@ trait BinaryArrayExpressionWithImplicitCast extends 
BinaryExpression
       > SELECT _FUNC_(map('a', 1, 'b', 2));
        2
       > SELECT _FUNC_(NULL);
-       NULL
+       -1
   """)
 case class Size(child: Expression, legacySizeOfNull: Boolean)
   extends UnaryExpression with ExpectsInputTypes {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fdaf0ec..644fe89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1942,7 +1942,7 @@ object SQLConf {
     .doc("If it is set to true, size of null returns -1. This behavior was 
inherited from Hive. " +
       "The size function returns null for null input if the flag is disabled.")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
     buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to