This is an automated email from the ASF dual-hosted git repository.

LuciferYang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d172a62127a6 [SPARK-57245][SQL] Simplify FormatNumber codegen by 
extracting a static Java helper
d172a62127a6 is described below

commit d172a62127a6481eb5b9d07c608876307550966e
Author: YangJie <[email protected]>
AuthorDate: Thu Jun 4 13:26:32 2026 +0800

    [SPARK-57245][SQL] Simplify FormatNumber codegen by extracting a static 
Java helper
    
    ### What changes were proposed in this pull request?
    
    This is a sub-task of 
[SPARK-56908](https://issues.apache.org/jira/browse/SPARK-56908) (reduce 
generated Java size in whole-stage codegen).
    
    For an integer `d`, `FormatNumber.doGenCode` inlined the number-pattern 
build (the default grouping pattern followed by `d` decimal places, applied to 
a `DecimalFormat`), an exact transliteration of the same block in 
`nullSafeEval`. The codegen also threaded a `pattern` `StringBuilder` through 
mutable state:
    
    ```java
    if (d >= 0) {
      pattern.delete(0, pattern.length());
      if (d != lastDValue) {
        pattern.append("#,###,###,###,###,###,##0");
        if (d > 0) {
          pattern.append(".");
          for (int i = 0; i < d; i++) {
            pattern.append("0");
          }
        }
        lastDValue = d;
        numberFormat.applyLocalizedPattern(pattern.toString());
      }
      ...
    }
    ```
    
    This PR moves the pattern build into a static helper 
`ExpressionImplUtils.applyNumberFormatScale(DecimalFormat, String 
defaultFormat, int scale)` that both `eval` and `doGenCode` call, mirroring the 
`ascii` helper added in SPARK-57208. The integer-`d` codegen becomes:
    
    ```java
    if (d >= 0) {
      if (d != lastDValue) {
        lastDValue = d;
        ExpressionImplUtils.applyNumberFormatScale(numberFormat, 
"#,###,###,###,###,###,##0", d);
      }
      ...
    }
    ```
    
    This removes the duplicated block, drops the `pattern` `StringBuilder` (an 
instance field in `eval` and a mutable-state slot in codegen), and removes a 
dead `pattern.delete(...)` that the string-pattern branch never read. The 
string-pattern path and the per-`d` caching (only rebuild when `d` changes) are 
unchanged.
    
    ### Why are the changes needed?
    
    To reduce the size of the generated Java in whole-stage codegen, as tracked 
by SPARK-56908. The integer-`d` branch shrinks to a single helper call, and the 
generated class drops one mutable-state field (the `pattern` `StringBuilder`). 
The same block is also deduplicated from `eval`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. This is a codegen/refactor change; `eval`, `nullable`, `dataType`, and 
results are unchanged, so SQL output and golden files are unaffected.
    
    ### How was this patch tested?
    
    Existing tests, which exercise the changed path in both interpreted and 
codegen modes:
    - `StringExpressionsSuite` "format_number / FormatNumber" (catalyst) covers 
integer `d` with scale > 0, scale = 0, and scale < 0 (null) across all numeric 
input types, plus the string-pattern path and null cases.
    - `StringFunctionsSuite` "number format function" (sql/core).
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Claude Opus 4.8)
    
    Closes #56299 from LuciferYang/formatnumber-codegen-helper.
    
    Authored-by: YangJie <[email protected]>
    Signed-off-by: yangjie01 <[email protected]>
---
 .../catalyst/expressions/ExpressionImplUtils.java  | 19 ++++++++++
 .../catalyst/expressions/stringExpressions.scala   | 41 +++-------------------
 2 files changed, 23 insertions(+), 37 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
index 1a49cf6bb52b..d44922261674 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
@@ -22,6 +22,7 @@ import java.security.GeneralSecurityException;
 import java.security.SecureRandom;
 import java.security.spec.AlgorithmParameterSpec;
 import java.text.BreakIterator;
+import java.text.DecimalFormat;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Locale;
@@ -448,4 +449,22 @@ public class ExpressionImplUtils {
       return 0;
     }
   }
+
+  /**
+   * Builds the number pattern for the given scale (the default grouping 
pattern followed by
+   * {@code scale} decimal places) and applies it to the given {@link 
DecimalFormat}. Shared by the
+   * FormatNumber expression's eval and codegen paths so the generated Java is 
a single call and
+   * the pattern buffer does not need to be threaded through mutable state.
+   */
+  public static void applyNumberFormatScale(
+      DecimalFormat numberFormat, String defaultFormat, int scale) {
+    StringBuilder pattern = new StringBuilder(defaultFormat);
+    if (scale > 0) {
+      pattern.append('.');
+      for (int i = 0; i < scale; i++) {
+        pattern.append('0');
+      }
+    }
+    numberFormat.applyLocalizedPattern(pattern.toString());
+  }
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index bbc9341d8e6d..45ec0e6a8b30 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.lang.{StringBuilder => JStringBuilder}
 import java.nio.{ByteBuffer, CharBuffer}
 import java.nio.charset.CharacterCodingException
 import java.text.{DecimalFormat, DecimalFormatSymbols}
@@ -3355,11 +3354,6 @@ case class FormatNumber(x: Expression, d: Expression)
   @transient
   private var lastDStringValue: Option[String] = None
 
-  // A cached DecimalFormat, for performance concern, we will change it
-  // only if the d value changed.
-  @transient
-  private lazy val pattern: JStringBuilder = new JStringBuilder()
-
   // SPARK-13515: US Locale configures the DecimalFormat object to use a dot 
('.')
   // as a decimal separator.
   @transient
@@ -3377,31 +3371,15 @@ case class FormatNumber(x: Expression, d: Expression)
           case Some(last) if last == dValue =>
           // use the current pattern
           case _ =>
-            // construct a new DecimalFormat only if a new dValue
-            pattern.delete(0, pattern.length)
-            pattern.append(defaultFormat)
-
-            // decimal place
-            if (dValue > 0) {
-              pattern.append(".")
-
-              var i = 0
-              while (i < dValue) {
-                i += 1
-                pattern.append("0")
-              }
-            }
-
+            // construct a new pattern only if a new dValue
             lastDIntValue = Some(dValue)
-
-            numberFormat.applyLocalizedPattern(pattern.toString)
+            ExpressionImplUtils.applyNumberFormatScale(numberFormat, 
defaultFormat, dValue)
         }
       case _: StringType =>
         val dValue = dObject.asInstanceOf[UTF8String].toString
         lastDStringValue match {
           case Some(last) if last == dValue =>
           case _ =>
-            pattern.delete(0, pattern.length)
             lastDStringValue = Some(dValue)
             if (dValue.isEmpty) {
               numberFormat.applyLocalizedPattern(defaultFormat)
@@ -3433,7 +3411,6 @@ case class FormatNumber(x: Expression, d: Expression)
         }
       }
 
-      val sb = classOf[JStringBuilder].getName
       val df = classOf[DecimalFormat].getName
       val dfs = classOf[DecimalFormatSymbols].getName
       val l = classOf[Locale].getName
@@ -3445,24 +3422,14 @@ case class FormatNumber(x: Expression, d: Expression)
 
       right.dataType match {
         case IntegerType =>
-          val pattern = ctx.addMutableState(sb, "pattern", v => s"$v = new 
$sb();")
-          val i = ctx.freshName("i")
           val lastDValue =
             ctx.addMutableState(CodeGenerator.JAVA_INT, "lastDValue", v => 
s"$v = -100;")
+          val utils = classOf[ExpressionImplUtils].getName
           s"""
             if ($d >= 0) {
-              $pattern.delete(0, $pattern.length());
               if ($d != $lastDValue) {
-                $pattern.append("$defaultFormat");
-
-                if ($d > 0) {
-                  $pattern.append(".");
-                  for (int $i = 0; $i < $d; $i++) {
-                    $pattern.append("0");
-                  }
-                }
                 $lastDValue = $d;
-                $numberFormat.applyLocalizedPattern($pattern.toString());
+                $utils.applyNumberFormatScale($numberFormat, "$defaultFormat", 
$d);
               }
               ${ev.value} = 
UTF8String.fromString($numberFormat.format(${typeHelper(num)}));
             } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to