This is an automated email from the ASF dual-hosted git repository.

LuciferYang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new d9b0dbf10572 [SPARK-57206][SQL] Simplify WidthBucket codegen by 
reusing the computeBucketNumber helper
d9b0dbf10572 is described below

commit d9b0dbf105722255b7fc6252cc634cf7cec407df
Author: YangJie <[email protected]>
AuthorDate: Wed Jun 3 10:25:00 2026 +0800

    [SPARK-57206][SQL] Simplify WidthBucket codegen by reusing the 
computeBucketNumber helper
    
    ### What changes were proposed in this pull request?
    
    `WidthBucket.doGenCode` emitted two separate static calls — 
`WidthBucket.isNull(...)` for the null check, then 
`WidthBucket.computeBucketNumberNotNull(...)` for the value — which duplicates 
the null/value split in the generated Java. The interpreted path 
(`nullSafeEval`) already goes through a single helper, 
`WidthBucket.computeBucketNumber`, which returns `null` when the bucket is out 
of range and the bucket number otherwise.
    
    This PR routes codegen through that same helper: the generated code calls 
`computeBucketNumber` once, null-checks the boxed result, and assigns it. 
`isNull` and `computeBucketNumberNotNull` are no longer called from generated 
code, so they are made `private`.
    
    ### Why are the changes needed?
    
    It drops one `invokestatic` and one constant-pool method reference at every 
`width_bucket` call site, and lets eval and codegen share one entry point 
instead of keeping two parallel paths in sync. Part of SPARK-56908.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing `MathExpressionsSuite` `width_bucket` tests. `checkEvaluation` 
exercises both the interpreted and generated paths with whole-stage codegen on 
and off, covering the null cases (`num_bucket <= 0`, `Long.MaxValue`, `NaN`, 
`min == max`, infinite bounds) and the year-month / day-time interval inputs.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Claude Opus 4.8)
    
    Closes #56265 from LuciferYang/widthbucket-codegen-reuse-helper.
    
    Authored-by: YangJie <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
    (cherry picked from commit 992baba7b11b942639f274fa135d217420c4f71a)
    Signed-off-by: yangjie01 <[email protected]>
---
 .../sql/catalyst/expressions/mathExpressions.scala      | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 56fbb00131d5..d816043e710d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1823,6 +1823,7 @@ case class BRound(
 }
 
 object WidthBucket {
+  /** Shared by interpreted eval and generated Java code; must stay public for 
codegen. */
   def computeBucketNumber(value: Double, min: Double, max: Double, numBucket: 
Long): jl.Long = {
     if (isNull(value, min, max, numBucket)) {
       null
@@ -1831,8 +1832,7 @@ object WidthBucket {
     }
   }
 
-  /** This function is called by generated Java code, so it needs to be 
public. */
-  def isNull(value: Double, min: Double, max: Double, numBucket: Long): 
Boolean = {
+  private def isNull(value: Double, min: Double, max: Double, numBucket: 
Long): Boolean = {
     numBucket <= 0 ||
       numBucket == Long.MaxValue ||
       jl.Double.isNaN(value) ||
@@ -1841,8 +1841,7 @@ object WidthBucket {
       jl.Double.isNaN(max) || jl.Double.isInfinite(max)
   }
 
-  /** This function is called by generated Java code, so it needs to be 
public. */
-  def computeBucketNumberNotNull(
+  private def computeBucketNumberNotNull(
       value: Double, min: Double, max: Double, numBucket: Long): jl.Long = {
     val lower = Math.min(min, max)
     val upper = Math.max(min, max)
@@ -1957,11 +1956,13 @@ case class WidthBucket(
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, (input, min, max, numBucket) => {
-      s"""${ev.isNull} = org.apache.spark.sql.catalyst.expressions.WidthBucket
-         |  .isNull($input, $min, $max, $numBucket);
+      val bucket = ctx.freshName("bucket")
+      val boxedLong = CodeGenerator.boxedType(dataType)
+      s"""$boxedLong $bucket = 
org.apache.spark.sql.catalyst.expressions.WidthBucket
+         |  .computeBucketNumber($input, $min, $max, $numBucket);
+         |${ev.isNull} = $bucket == null;
          |if (!${ev.isNull}) {
-         |  ${ev.value} = org.apache.spark.sql.catalyst.expressions.WidthBucket
-         |    .computeBucketNumberNotNull($input, $min, $max, $numBucket);
+         |  ${ev.value} = $bucket;
          |}""".stripMargin
     })
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to