This is an automated email from the ASF dual-hosted git repository.
LuciferYang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new d9b0dbf10572 [SPARK-57206][SQL] Simplify WidthBucket codegen by
reusing the computeBucketNumber helper
d9b0dbf10572 is described below
commit d9b0dbf105722255b7fc6252cc634cf7cec407df
Author: YangJie <[email protected]>
AuthorDate: Wed Jun 3 10:25:00 2026 +0800
[SPARK-57206][SQL] Simplify WidthBucket codegen by reusing the
computeBucketNumber helper
### What changes were proposed in this pull request?
`WidthBucket.doGenCode` emitted two separate static calls —
`WidthBucket.isNull(...)` for the null check, then
`WidthBucket.computeBucketNumberNotNull(...)` for the value — which duplicates
the null/value split in the generated Java. The interpreted path
(`nullSafeEval`) already goes through a single helper,
`WidthBucket.computeBucketNumber`, which returns `null` when the bucket is out
of range and the bucket number otherwise.
This PR routes codegen through that same helper: the generated code calls
`computeBucketNumber` once, null-checks the boxed result, and assigns it.
`isNull` and `computeBucketNumberNotNull` are no longer called from generated
code, so they are made `private`.
### Why are the changes needed?
It drops one `invokestatic` and one constant-pool method reference at every
`width_bucket` call site, and lets eval and codegen share one entry point
instead of keeping two parallel paths in sync. Part of SPARK-56908.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing `MathExpressionsSuite` `width_bucket` tests. `checkEvaluation`
exercises both the interpreted and generated paths with whole-stage codegen on
and off, covering the null cases (`num_bucket <= 0`, `Long.MaxValue`, `NaN`,
`min == max`, infinite bounds) and the year-month / day-time interval inputs.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Claude Opus 4.8)
Closes #56265 from LuciferYang/widthbucket-codegen-reuse-helper.
Authored-by: YangJie <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
(cherry picked from commit 992baba7b11b942639f274fa135d217420c4f71a)
Signed-off-by: yangjie01 <[email protected]>
---
.../sql/catalyst/expressions/mathExpressions.scala | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 56fbb00131d5..d816043e710d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1823,6 +1823,7 @@ case class BRound(
}
object WidthBucket {
+ /** Shared by interpreted eval and generated Java code; must stay public for
codegen. */
def computeBucketNumber(value: Double, min: Double, max: Double, numBucket:
Long): jl.Long = {
if (isNull(value, min, max, numBucket)) {
null
@@ -1831,8 +1832,7 @@ object WidthBucket {
}
}
- /** This function is called by generated Java code, so it needs to be
public. */
- def isNull(value: Double, min: Double, max: Double, numBucket: Long):
Boolean = {
+ private def isNull(value: Double, min: Double, max: Double, numBucket:
Long): Boolean = {
numBucket <= 0 ||
numBucket == Long.MaxValue ||
jl.Double.isNaN(value) ||
@@ -1841,8 +1841,7 @@ object WidthBucket {
jl.Double.isNaN(max) || jl.Double.isInfinite(max)
}
- /** This function is called by generated Java code, so it needs to be
public. */
- def computeBucketNumberNotNull(
+ private def computeBucketNumberNotNull(
value: Double, min: Double, max: Double, numBucket: Long): jl.Long = {
val lower = Math.min(min, max)
val upper = Math.max(min, max)
@@ -1957,11 +1956,13 @@ case class WidthBucket(
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
nullSafeCodeGen(ctx, ev, (input, min, max, numBucket) => {
- s"""${ev.isNull} = org.apache.spark.sql.catalyst.expressions.WidthBucket
- | .isNull($input, $min, $max, $numBucket);
+ val bucket = ctx.freshName("bucket")
+ val boxedLong = CodeGenerator.boxedType(dataType)
+ s"""$boxedLong $bucket =
org.apache.spark.sql.catalyst.expressions.WidthBucket
+ | .computeBucketNumber($input, $min, $max, $numBucket);
+ |${ev.isNull} = $bucket == null;
|if (!${ev.isNull}) {
- | ${ev.value} = org.apache.spark.sql.catalyst.expressions.WidthBucket
- | .computeBucketNumberNotNull($input, $min, $max, $numBucket);
+ | ${ev.value} = $bucket;
|}""".stripMargin
})
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]