Repository: spark Updated Branches: refs/heads/branch-2.4 ffd036a6d -> fb4965a41
[SPARK-25371][SQL] struct() should allow being called with 0 args ## What changes were proposed in this pull request? SPARK-21281 introduced a check for the inputs of `CreateStructLike` to be non-empty. This means that `struct()`, which was previously considered valid, now throws an Exception. This behavior change was introduced in 2.3.0. The change may break users' application on upgrade and it causes `VectorAssembler` to fail when an empty `inputCols` is defined. The PR removes the added check making `struct()` valid again. ## How was this patch tested? added UT Closes #22373 from mgaido91/SPARK-25371. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 0736e72a66735664b191fc363f54e3c522697dba) Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb4965a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb4965a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb4965a4 Branch: refs/heads/branch-2.4 Commit: fb4965a41941f3a196de77a870a8a1f29c96dac0 Parents: ffd036a Author: Marco Gaido <marcogaid...@gmail.com> Authored: Tue Sep 11 14:16:56 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Tue Sep 11 14:17:49 2018 +0800 ---------------------------------------------------------------------- .../org/apache/spark/ml/feature/VectorAssemblerSuite.scala | 5 +++++ .../spark/sql/catalyst/expressions/complexTypeCreator.scala | 5 +---- .../scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala | 2 -- 3 files changed, 6 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fb4965a4/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala index ed15a1d..a4d388f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala @@ -256,4 +256,9 @@ class VectorAssemblerSuite assert(runWithMetadata("keep", additional_filter = "id1 > 2").count() == 4) } + test("SPARK-25371: VectorAssembler with empty inputCols") { + val vectorAssembler = new VectorAssembler().setInputCols(Array()).setOutputCol("a") + val output = vectorAssembler.transform(dfWithNullsAndNaNs) + assert(output.select("a").limit(1).collect().head == Row(Vectors.sparse(0, Seq.empty))) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/fb4965a4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala index 077a6dc..aba9c6c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala @@ -379,10 +379,7 @@ trait CreateNamedStructLike extends Expression { } override def checkInputDataTypes(): TypeCheckResult = { - if (children.length < 1) { - TypeCheckResult.TypeCheckFailure( - s"input to function $prettyName requires at least one argument") - } else if (children.size % 2 != 0) { + if (children.size % 2 != 0) { TypeCheckResult.TypeCheckFailure(s"$prettyName expects an even number of arguments.") } else { val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType) http://git-wip-us.apache.org/repos/asf/spark/blob/fb4965a4/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 4b83e51..121db442 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -2677,8 +2677,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { val funcsMustHaveAtLeastOneArg = ("coalesce", (df: DataFrame) => df.select(coalesce())) :: ("coalesce", (df: DataFrame) => df.selectExpr("coalesce()")) :: - ("named_struct", (df: DataFrame) => df.select(struct())) :: - ("named_struct", (df: DataFrame) => df.selectExpr("named_struct()")) :: ("hash", (df: DataFrame) => df.select(hash())) :: ("hash", (df: DataFrame) => df.selectExpr("hash()")) :: Nil funcsMustHaveAtLeastOneArg.foreach { case (name, func) => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org