This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 85b7a18b18d2 [SPARK-57174][SQL] Simplify Chr codegen by extracting a 
static Java helper
85b7a18b18d2 is described below

commit 85b7a18b18d200f1f278568d75a6b8c85b5028df
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Jun 2 21:44:35 2026 -0700

    [SPARK-57174][SQL] Simplify Chr codegen by extracting a static Java helper
    
    ### What changes were proposed in this pull request?
    
    Add `ExpressionImplUtils.chr(long longVal)` and route `Chr`'s eval and 
codegen paths through it. `Chr.doGenCode` previously emitted a ~7-line inline 
if/else chain (negative -> empty string, `(v & 0xFF) == 0` -> NUL, otherwise 
the Latin-1 character); it now emits a single `ExpressionImplUtils.chr(...)` 
call, and the eval path calls the same helper.
    
    This is a plain (non-ANSI, non-try/catch) type-independent block, in line 
with the broadened goal of SPARK-56908 to move fixed generated-Java logic into 
static Java helpers.
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella). Collapsing the inline if/else chain to one 
call shrinks the generated Java for every stage that uses `chr`, helping with 
the JVM 64KB method / constant-pool limits, Janino compile time, and JIT work.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. The compiled behavior is identical; only the emitted Java source text 
changes.
    
    ### How was this patch tested?
    
    ```
    build/sbt "catalyst/testOnly *StringExpressionsSuite"
    ```
    
    59/59 pass, including `string for ascii` which covers `Chr` over negative, 
zero, wrap-around (256), high-bit (149) and null inputs (exercised both with 
and without whole-stage codegen).
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8)
    
    Closes #56224 from gengliangwang/spark-chr-codegen.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../catalyst/expressions/ExpressionImplUtils.java  | 17 ++++++++++++++++
 .../catalyst/expressions/stringExpressions.scala   | 23 +++-------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

diff --git 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
index 1053650a3709..fa1741cb08f7 100644
--- 
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
+++ 
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
@@ -346,6 +346,23 @@ public class ExpressionImplUtils {
     return UTF8String.fromString(qtChar + sp + qtChar);
   }
 
+  /**
+   * Returns the single-character string for the {@code chr} expression: the
+   * ASCII/Latin-1 character for {@code longVal & 0xFF}. A negative argument
+   * yields the empty string. Shared by the eval and codegen paths so the
+   * generated Java is a single call rather than an inline if/else chain.
+   */
+  public static UTF8String chr(long longVal) {
+    if (longVal < 0) {
+      return UTF8String.EMPTY_UTF8;
+    } else if ((longVal & 0xFF) == 0) {
+      return UTF8String.fromString(String.valueOf(Character.MIN_VALUE));
+    } else {
+      char c = (char) (longVal & 0xFF);
+      return UTF8String.fromString(String.valueOf(c));
+    }
+  }
+
   /**
    * Compiles {@code regex} with the given {@code flags} for the regexp 
expression
    * family, translating a {@link PatternSyntaxException} into the user-facing
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 5b5a63812dca..5c6e457421bf 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2822,31 +2822,14 @@ case class Chr(child: Expression)
   override def inputTypes: Seq[DataType] = Seq(LongType)
 
   protected override def nullSafeEval(lon: Any): Any = {
-    val longVal = lon.asInstanceOf[Long]
-    if (longVal < 0) {
-      UTF8String.EMPTY_UTF8
-    } else if ((longVal & 0xFF) == 0) {
-      UTF8String.fromString(Character.MIN_VALUE.toString)
-    } else {
-      UTF8String.fromString((longVal & 0xFF).toChar.toString)
-    }
+    ExpressionImplUtils.chr(lon.asInstanceOf[Long])
   }
 
   override def contextIndependentFoldable: Boolean = 
child.contextIndependentFoldable
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, lon => {
-      s"""
-        if ($lon < 0) {
-          ${ev.value} = UTF8String.EMPTY_UTF8;
-        } else if (($lon & 0xFF) == 0) {
-          ${ev.value} = 
UTF8String.fromString(String.valueOf(Character.MIN_VALUE));
-        } else {
-          char c = (char)($lon & 0xFF);
-          ${ev.value} = UTF8String.fromString(String.valueOf(c));
-        }
-      """
-    })
+    val utils = classOf[ExpressionImplUtils].getName
+    nullSafeCodeGen(ctx, ev, lon => s"${ev.value} = $utils.chr($lon);")
   }
 
   override protected def withNewChildInternal(newChild: Expression): Chr = 
copy(child = newChild)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to