This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new c1e6e14 [SPARK-31091] Revert SPARK-24640 Return `NULL` from
`size(NULL)` by default
c1e6e14 is described below
commit c1e6e1439d1a79560f197e2627a334fcd0bb8a28
Author: Wenchen Fan <[email protected]>
AuthorDate: Wed Mar 11 09:55:24 2020 -0700
[SPARK-31091] Revert SPARK-24640 Return `NULL` from `size(NULL)` by default
### What changes were proposed in this pull request?
This PR reverts https://github.com/apache/spark/pull/26051 and
https://github.com/apache/spark/pull/26066
### Why are the changes needed?
There is no standard requiring that `size(null)` must return null, and
returning -1 looks reasonable as well. This is kind of a cosmetic change and we
should avoid it if it breaks existing queries. This is similar to reverting
TRIM function parameter order change.
### Does this PR introduce any user-facing change?
Yes, change the behavior of `size(null)` back to be the same as 2.4.
### How was this patch tested?
N/A
Closes #27834 from cloud-fan/revert.
Authored-by: Wenchen Fan <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 8efb71013d0c9e8d81430aa48f88b91929425bff)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
docs/sql-migration-guide.md | 2 --
.../apache/spark/sql/catalyst/expressions/collectionOperations.scala | 4 ++--
.../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +-
3 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 6c73038..e7ac9f0 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -214,8 +214,6 @@ license: |
- `now` - current query start time
For example `SELECT timestamp 'tomorrow';`.
- - Since Spark 3.0, the `size` function returns `NULL` for the `NULL` input.
In Spark version 2.4 and earlier, this function gives `-1` for the same input.
To restore the behavior before Spark 3.0, you can set
`spark.sql.legacy.sizeOfNull` to `true`.
-
- Since Spark 3.0, when the `array`/`map` function is called without any
parameters, it returns an empty collection with `NullType` as element type. In
Spark version 2.4 and earlier, it returns an empty collection with `StringType`
as element type. To restore the behavior before Spark 3.0, you can set
`spark.sql.legacy.createEmptyCollectionUsingStringType` to `true`.
- Since Spark 3.0, the interval literal syntax does not allow multiple
from-to units anymore. For example, `SELECT INTERVAL '1-1' YEAR TO MONTH '2-2'
YEAR TO MONTH'` throws parser exception.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index cfa877b..6d95909 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -79,7 +79,7 @@ trait BinaryArrayExpressionWithImplicitCast extends
BinaryExpression
_FUNC_(expr) - Returns the size of an array or a map.
The function returns -1 if its input is null and
spark.sql.legacy.sizeOfNull is set to true.
If spark.sql.legacy.sizeOfNull is set to false, the function returns null
for null input.
- By default, the spark.sql.legacy.sizeOfNull parameter is set to false.
+ By default, the spark.sql.legacy.sizeOfNull parameter is set to true.
""",
examples = """
Examples:
@@ -88,7 +88,7 @@ trait BinaryArrayExpressionWithImplicitCast extends
BinaryExpression
> SELECT _FUNC_(map('a', 1, 'b', 2));
2
> SELECT _FUNC_(NULL);
- NULL
+ -1
""")
case class Size(child: Expression, legacySizeOfNull: Boolean)
extends UnaryExpression with ExpectsInputTypes {
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index fdaf0ec..644fe89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1942,7 +1942,7 @@ object SQLConf {
.doc("If it is set to true, size of null returns -1. This behavior was
inherited from Hive. " +
"The size function returns null for null input if the flag is disabled.")
.booleanConf
- .createWithDefault(false)
+ .createWithDefault(true)
val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]