This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 5250ed65cf2 [SPARK-45079][SQL][3.3] Fix an internal error from
`percentile_approx()` on `NULL` accuracy
5250ed65cf2 is described below
commit 5250ed65cf2c70e4b456c96c1006b854f56ef1f2
Author: Max Gekk <[email protected]>
AuthorDate: Wed Sep 6 18:56:14 2023 +0300
[SPARK-45079][SQL][3.3] Fix an internal error from `percentile_approx()` on
`NULL` accuracy
### What changes were proposed in this pull request?
In the PR, I propose to check the `accuracy` argument is not a NULL in
`ApproximatePercentile`. And if it is, throw an `AnalysisException` with new
error class `DATATYPE_MISMATCH.UNEXPECTED_NULL`.
This is a backport of https://github.com/apache/spark/pull/42817.
### Why are the changes needed?
To fix the issue demonstrated by the example:
```sql
$ spark-sql (default)> SELECT percentile_approx(col, array(0.5, 0.4, 0.1),
NULL) FROM VALUES (0), (1), (2), (10) AS tab(col);
[INTERNAL_ERROR] The Spark SQL phase analysis failed with an internal
error. You hit a bug in Spark or the Spark plugins you use. Please, report this
bug to the corresponding communities or vendors, and provide the full stack
trace.
```
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
By running new test:
```
$ build/sbt "test:testOnly *.ApproximatePercentileQuerySuite"
```
### Was this patch authored or co-authored using generative AI tooling?
No.
Authored-by: Max Gekk <max.gekkgmail.com>
(cherry picked from commit 24b29adcf53616067a9fa2ca201e3f4d2f54436b)
Closes #42835 from MaxGekk/fix-internal-error-in-percentile_approx-3.3.
Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../expressions/aggregate/ApproximatePercentile.scala | 5 ++++-
.../spark/sql/ApproximatePercentileQuerySuite.scala | 19 +++++++++++++++++++
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index d8eccc075a2..b816e4a9719 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -95,7 +95,8 @@ case class ApproximatePercentile(
}
// Mark as lazy so that accuracyExpression is not evaluated during tree
transformation.
- private lazy val accuracy: Long =
accuracyExpression.eval().asInstanceOf[Number].longValue
+ private lazy val accuracyNum = accuracyExpression.eval().asInstanceOf[Number]
+ private lazy val accuracy: Long = accuracyNum.longValue
override def inputTypes: Seq[AbstractDataType] = {
// Support NumericType, DateType, TimestampType and TimestampNTZType since
their internal types
@@ -120,6 +121,8 @@ case class ApproximatePercentile(
defaultCheck
} else if (!percentageExpression.foldable || !accuracyExpression.foldable)
{
TypeCheckFailure(s"The accuracy or percentage provided must be a
constant literal")
+ } else if (accuracyNum == null) {
+ TypeCheckFailure("Accuracy value must not be null")
} else if (accuracy <= 0 || accuracy > Int.MaxValue) {
TypeCheckFailure(s"The accuracy provided must be a literal between (0,
${Int.MaxValue}]" +
s" (current value = $accuracy)")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
index 9237c9e9486..3fd1592a107 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
@@ -337,4 +337,23 @@ class ApproximatePercentileQuerySuite extends QueryTest
with SharedSparkSession
Row(Period.ofMonths(200).normalized(), null,
Duration.ofSeconds(200L)))
}
}
+
+ test("SPARK-45079: NULL arguments of percentile_approx") {
+ val e1 = intercept[AnalysisException] {
+ sql(
+ """
+ |SELECT percentile_approx(col, array(0.5, 0.4, 0.1), NULL)
+ |FROM VALUES (0), (1), (2), (10) AS tab(col);
+ |""".stripMargin).collect()
+ }
+ assert(e1.getMessage.contains("Accuracy value must not be null"))
+ val e2 = intercept[AnalysisException] {
+ sql(
+ """
+ |SELECT percentile_approx(col, NULL, 100)
+ |FROM VALUES (0), (1), (2), (10) AS tab(col);
+ |""".stripMargin).collect()
+ }
+ assert(e2.getMessage.contains("Percentage value must not be null"))
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]