This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new cb253b1 [SPARK-31393][SQL] Show the correct alias in schema for
expression
cb253b1 is described below
commit cb253b1c5f9b3e6c71adb5d43d8e82514aba00ef
Author: beliefer <[email protected]>
AuthorDate: Tue May 12 10:25:04 2020 +0900
[SPARK-31393][SQL] Show the correct alias in schema for expression
### What changes were proposed in this pull request?
Some alias of expression can not display correctly in schema. This PR will
fix them.
- `TimeWindow`
- `MaxBy`
- `MinBy`
- `UnaryMinus`
- `BitwiseCount`
This PR also fix a typo issue, please look at
https://github.com/apache/spark/blob/b7cde42b04b21c9bfee6535199cf385855c15853/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala#L142
Note:
1. `MaxBy` and `MinBy` extends `MaxMinBy` and the latter add a method
`funcName` not needed. We can reuse `prettyName` to replace `funcName`.
2. Spark SQL exists some function no elegant implementation.For example:
`BitwiseCount` override the sql method show below:
`override def sql: String = s"bit_count(${child.sql})"`
I don't think it's elegant enough. Because `Expression` gives the following
definitions.
```
def sql: String = {
val childrenSQL = children.map(_.sql).mkString(", ")
s"$prettyName($childrenSQL)"
}
```
By this definition, `BitwiseCount` should override `prettyName` method.
### Why are the changes needed?
Improve the implement of some expression.
### Does this PR introduce any user-facing change?
'Yes'. This PR will let user see the correct alias in schema.
### How was this patch tested?
Jenkins test.
Closes #28164 from beliefer/elegant-pretty-name-for-function.
Lead-authored-by: beliefer <[email protected]>
Co-authored-by: gengjiaan <[email protected]>
Signed-off-by: HyukjinKwon <[email protected]>
(cherry picked from commit a89006aba03a623960e5c4c6864ca8c899c81db9)
Signed-off-by: HyukjinKwon <[email protected]>
---
.../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 2 +-
.../org/apache/spark/sql/catalyst/expressions/TimeWindow.scala | 1 +
.../spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala | 9 +++++----
.../org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 7 ++++++-
.../spark/sql/catalyst/expressions/bitwiseExpressions.scala | 4 ++--
.../src/test/resources/sql-functions/sql-expression-schema.md | 8 ++++----
sql/core/src/test/resources/sql-tests/results/operators.sql.out | 2 +-
.../test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +-
8 files changed, 21 insertions(+), 14 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index d3b0731..c2c0df5 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -253,7 +253,7 @@ object FunctionRegistry {
expression[Log2]("log2"),
expression[Log]("ln"),
expression[Remainder]("mod", true),
- expression[UnaryMinus]("negative"),
+ expression[UnaryMinus]("negative", true),
expression[Pi]("pi"),
expression[Pmod]("pmod"),
expression[UnaryPositive]("positive"),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index caacb71..82d6894 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -63,6 +63,7 @@ case class TimeWindow(
override def dataType: DataType = new StructType()
.add(StructField("start", TimestampType))
.add(StructField("end", TimestampType))
+ override def prettyName: String = "window"
// This expression is replaced in the analyzer.
override lazy val resolved = false
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
index 2e20224..6d3d3da 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
@@ -31,7 +31,6 @@ abstract class MaxMinBy extends DeclarativeAggregate {
def valueExpr: Expression
def orderingExpr: Expression
- protected def funcName: String
// The predicate compares two ordering values.
protected def predicate(oldExpr: Expression, newExpr: Expression): Expression
// The arithmetic expression returns greatest/least value of all parameters.
@@ -46,7 +45,7 @@ abstract class MaxMinBy extends DeclarativeAggregate {
override def dataType: DataType = valueExpr.dataType
override def checkInputDataTypes(): TypeCheckResult =
- TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function
$funcName")
+ TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function
$prettyName")
// The attributes used to keep extremum (max or min) and associated
aggregated values.
private lazy val extremumOrdering =
@@ -101,7 +100,8 @@ abstract class MaxMinBy extends DeclarativeAggregate {
group = "agg_funcs",
since = "3.0.0")
case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends
MaxMinBy {
- override protected def funcName: String = "max_by"
+
+ override def prettyName: String = "max_by"
override protected def predicate(oldExpr: Expression, newExpr: Expression):
Expression =
oldExpr > newExpr
@@ -120,7 +120,8 @@ case class MaxBy(valueExpr: Expression, orderingExpr:
Expression) extends MaxMin
group = "agg_funcs",
since = "3.0.0")
case class MinBy(valueExpr: Expression, orderingExpr: Expression) extends
MaxMinBy {
- override protected def funcName: String = "min_by"
+
+ override def prettyName: String = "min_by"
override protected def predicate(oldExpr: Expression, newExpr: Expression):
Expression =
oldExpr < newExpr
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 6a64819..354845d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -86,7 +86,12 @@ case class UnaryMinus(child: Expression) extends
UnaryExpression
case _ => numeric.negate(input)
}
- override def sql: String = s"(- ${child.sql})"
+ override def sql: String = {
+ getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("-") match {
+ case "-" => s"(- ${child.sql})"
+ case funcName => s"$funcName(${child.sql})"
+ }
+ }
}
@ExpressionDescription(
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 72a8f7e..7b819db 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -172,6 +172,8 @@ case class BitwiseCount(child: Expression) extends
UnaryExpression with ExpectsI
override def toString: String = s"bit_count($child)"
+ override def prettyName: String = "bit_count"
+
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
child.dataType match {
case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
@@ -184,6 +186,4 @@ case class BitwiseCount(child: Expression) extends
UnaryExpression with ExpectsI
case IntegerType => java.lang.Long.bitCount(input.asInstanceOf[Int])
case LongType => java.lang.Long.bitCount(input.asInstanceOf[Long])
}
-
- override def sql: String = s"bit_count(${child.sql})"
}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 2091de2..c3ae2a7 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,4 +1,4 @@
-<!-- Automatically generated byExpressionsSchemaSuite -->
+<!-- Automatically generated by ExpressionsSchemaSuite -->
## Summary
- Number of queries: 328
- Number of expressions that missing example: 34
@@ -273,7 +273,7 @@
| org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc |
SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR,
CAST(2015-03-05T09:32:05.359 AS TIMESTAMP)):timestamp> |
| org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1)
| struct<typeof(1):string> |
| org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT
unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
-| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT
negative(1) | struct<(- 1):int> |
+| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT
negative(1) | struct<negative(1):int> |
| org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A |
N/A |
| org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT
decode(unhex('537061726B2053514C'), 'UTF-8') |
struct<decode(unhex(537061726B2053514C), UTF-8):string> |
| org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp |
SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd
HH:mm:ss):bigint> |
@@ -312,9 +312,9 @@
| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value |
SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) |
struct<last_value(col, false):int> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT
last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last(col,
false):int> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT
max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT
max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) |
struct<maxby(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT
max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) |
struct<max_by(x, y):string> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT
min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT
min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) |
struct<minby(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT
min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) |
struct<min_by(x, y):string> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile
| SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) |
struct<percentile(col, CAST(0.3 AS DOUBLE), 1):double> |
| org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness |
SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) |
struct<skewness(CAST(col AS DOUBLE)):double> |
| org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop |
SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) |
struct<stddev_pop(CAST(col AS DOUBLE)):double> |
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index cf857cf..a94a123 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -437,7 +437,7 @@ struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS
DOUBLE)):double>
-- !query
select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11)
-- !query schema
-struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11
AS DOUBLE)):double,(- -1.11):decimal(3,2)>
+struct<(+ CAST(-1.11 AS DOUBLE)):double,(+
-1.11):decimal(3,2),negative(CAST(-1.11 AS
DOUBLE)):double,negative(-1.11):decimal(3,2)>
-- !query output
-1.11 -1.11 1.11 1.11
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index d83aa4f..d69ecd7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -142,7 +142,7 @@ class ExpressionsSchemaSuite extends QueryTest with
SharedSparkSession {
}
val header = Seq(
- s"<!-- Automatically generated by${getClass.getSimpleName} -->",
+ s"<!-- Automatically generated by ${getClass.getSimpleName} -->",
"## Summary",
s" - Number of queries: ${outputs.size}",
s" - Number of expressions that missing example:
${missingExamples.size}",
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]