[spark] branch branch-3.0 updated: [SPARK-31393][SQL] Show the correct alias in schema for expression

gurwls223 Mon, 11 May 2020 18:29:32 -0700

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new cb253b1  [SPARK-31393][SQL] Show the correct alias in schema for 
expression
cb253b1 is described below

commit cb253b1c5f9b3e6c71adb5d43d8e82514aba00ef
Author: beliefer <[email protected]>
AuthorDate: Tue May 12 10:25:04 2020 +0900

    [SPARK-31393][SQL] Show the correct alias in schema for expression
    
    ### What changes were proposed in this pull request?
    Some alias of expression can not display correctly in schema. This PR will 
fix them.
    - `TimeWindow`
    - `MaxBy`
    - `MinBy`
    - `UnaryMinus`
    - `BitwiseCount`
    
    This PR also fix a typo issue, please look at 
https://github.com/apache/spark/blob/b7cde42b04b21c9bfee6535199cf385855c15853/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala#L142
    
    Note:
    
    1. `MaxBy` and `MinBy` extends `MaxMinBy` and the latter add a method 
`funcName` not needed.  We can reuse `prettyName` to replace `funcName`.
    2. Spark SQL exists some function no elegant implementation.For example: 
`BitwiseCount` override the sql method show below:
    `override def sql: String = s"bit_count(${child.sql})"`
    I don't think it's elegant enough. Because `Expression` gives the following 
definitions.
    ```
      def sql: String = {
        val childrenSQL = children.map(_.sql).mkString(", ")
        s"$prettyName($childrenSQL)"
      }
    ```
    By this definition, `BitwiseCount` should override `prettyName` method.
    
    ### Why are the changes needed?
    Improve the implement of some expression.
    
    ### Does this PR introduce any user-facing change?
     'Yes'. This PR will let user see the correct alias in schema.
    
    ### How was this patch tested?
    Jenkins test.
    
    Closes #28164 from beliefer/elegant-pretty-name-for-function.
    
    Lead-authored-by: beliefer <[email protected]>
    Co-authored-by: gengjiaan <[email protected]>
    Signed-off-by: HyukjinKwon <[email protected]>
    (cherry picked from commit a89006aba03a623960e5c4c6864ca8c899c81db9)
    Signed-off-by: HyukjinKwon <[email protected]>
---
 .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala    | 2 +-
 .../org/apache/spark/sql/catalyst/expressions/TimeWindow.scala   | 1 +
 .../spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala | 9 +++++----
 .../org/apache/spark/sql/catalyst/expressions/arithmetic.scala   | 7 ++++++-
 .../spark/sql/catalyst/expressions/bitwiseExpressions.scala      | 4 ++--
 .../src/test/resources/sql-functions/sql-expression-schema.md    | 8 ++++----
 sql/core/src/test/resources/sql-tests/results/operators.sql.out  | 2 +-
 .../test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +-
 8 files changed, 21 insertions(+), 14 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index d3b0731..c2c0df5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -253,7 +253,7 @@ object FunctionRegistry {
     expression[Log2]("log2"),
     expression[Log]("ln"),
     expression[Remainder]("mod", true),
-    expression[UnaryMinus]("negative"),
+    expression[UnaryMinus]("negative", true),
     expression[Pi]("pi"),
     expression[Pmod]("pmod"),
     expression[UnaryPositive]("positive"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index caacb71..82d6894 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -63,6 +63,7 @@ case class TimeWindow(
   override def dataType: DataType = new StructType()
     .add(StructField("start", TimestampType))
     .add(StructField("end", TimestampType))
+  override def prettyName: String = "window"
 
   // This expression is replaced in the analyzer.
   override lazy val resolved = false
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
index 2e20224..6d3d3da 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
@@ -31,7 +31,6 @@ abstract class MaxMinBy extends DeclarativeAggregate {
   def valueExpr: Expression
   def orderingExpr: Expression
 
-  protected def funcName: String
   // The predicate compares two ordering values.
   protected def predicate(oldExpr: Expression, newExpr: Expression): Expression
   // The arithmetic expression returns greatest/least value of all parameters.
@@ -46,7 +45,7 @@ abstract class MaxMinBy extends DeclarativeAggregate {
   override def dataType: DataType = valueExpr.dataType
 
   override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function 
$funcName")
+    TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function 
$prettyName")
 
   // The attributes used to keep extremum (max or min) and associated 
aggregated values.
   private lazy val extremumOrdering =
@@ -101,7 +100,8 @@ abstract class MaxMinBy extends DeclarativeAggregate {
   group = "agg_funcs",
   since = "3.0.0")
 case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends 
MaxMinBy {
-  override protected def funcName: String = "max_by"
+
+  override def prettyName: String = "max_by"
 
   override protected def predicate(oldExpr: Expression, newExpr: Expression): 
Expression =
     oldExpr > newExpr
@@ -120,7 +120,8 @@ case class MaxBy(valueExpr: Expression, orderingExpr: 
Expression) extends MaxMin
   group = "agg_funcs",
   since = "3.0.0")
 case class MinBy(valueExpr: Expression, orderingExpr: Expression) extends 
MaxMinBy {
-  override protected def funcName: String = "min_by"
+
+  override def prettyName: String = "min_by"
 
   override protected def predicate(oldExpr: Expression, newExpr: Expression): 
Expression =
     oldExpr < newExpr
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 6a64819..354845d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -86,7 +86,12 @@ case class UnaryMinus(child: Expression) extends 
UnaryExpression
     case _ => numeric.negate(input)
   }
 
-  override def sql: String = s"(- ${child.sql})"
+  override def sql: String = {
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("-") match {
+      case "-" => s"(- ${child.sql})"
+      case funcName => s"$funcName(${child.sql})"
+    }
+  }
 }
 
 @ExpressionDescription(
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 72a8f7e..7b819db 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -172,6 +172,8 @@ case class BitwiseCount(child: Expression) extends 
UnaryExpression with ExpectsI
 
   override def toString: String = s"bit_count($child)"
 
+  override def prettyName: String = "bit_count"
+
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = 
child.dataType match {
     case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
     case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
@@ -184,6 +186,4 @@ case class BitwiseCount(child: Expression) extends 
UnaryExpression with ExpectsI
     case IntegerType => java.lang.Long.bitCount(input.asInstanceOf[Int])
     case LongType => java.lang.Long.bitCount(input.asInstanceOf[Long])
   }
-
-  override def sql: String = s"bit_count(${child.sql})"
 }
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md 
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 2091de2..c3ae2a7 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,4 +1,4 @@
-<!-- Automatically generated byExpressionsSchemaSuite -->
+<!-- Automatically generated by ExpressionsSchemaSuite -->
 ## Summary
   - Number of queries: 328
   - Number of expressions that missing example: 34
@@ -273,7 +273,7 @@
 | org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | 
SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, 
CAST(2015-03-05T09:32:05.359 AS TIMESTAMP)):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) 
| struct<typeof(1):string> |
 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT 
unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
-| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT 
negative(1) | struct<(- 1):int> |
+| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT 
negative(1) | struct<negative(1):int> |
 | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | 
N/A |
 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT 
decode(unhex('537061726B2053514C'), 'UTF-8') | 
struct<decode(unhex(537061726B2053514C), UTF-8):string> |
 | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | 
SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd 
HH:mm:ss):bigint> |
@@ -312,9 +312,9 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | 
SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | 
struct<last_value(col, false):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT 
last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last(col, 
false):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT 
max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT 
max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | 
struct<maxby(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT 
max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | 
struct<max_by(x, y):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT 
min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT 
min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | 
struct<minby(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT 
min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | 
struct<min_by(x, y):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile 
| SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | 
struct<percentile(col, CAST(0.3 AS DOUBLE), 1):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | 
SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | 
struct<skewness(CAST(col AS DOUBLE)):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | 
SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | 
struct<stddev_pop(CAST(col AS DOUBLE)):double> |
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out 
b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index cf857cf..a94a123 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -437,7 +437,7 @@ struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS 
DOUBLE)):double>
 -- !query
 select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11)
 -- !query schema
-struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11 
AS DOUBLE)):double,(- -1.11):decimal(3,2)>
+struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ 
-1.11):decimal(3,2),negative(CAST(-1.11 AS 
DOUBLE)):double,negative(-1.11):decimal(3,2)>
 -- !query output
 -1.11  -1.11   1.11    1.11
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index d83aa4f..d69ecd7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -142,7 +142,7 @@ class ExpressionsSchemaSuite extends QueryTest with 
SharedSparkSession {
     }
 
     val header = Seq(
-      s"<!-- Automatically generated by${getClass.getSimpleName} -->",
+      s"<!-- Automatically generated by ${getClass.getSimpleName} -->",
       "## Summary",
       s"  - Number of queries: ${outputs.size}",
       s"  - Number of expressions that missing example: 
${missingExamples.size}",


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.0 updated: [SPARK-31393][SQL] Show the correct alias in schema for expression

Reply via email to