This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new a4d8719a2311 [SPARK-57174][SQL] Simplify Chr codegen by extracting a
static Java helper
a4d8719a2311 is described below
commit a4d8719a231132bd940551b0f30bf2719e9eb043
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Jun 2 21:44:35 2026 -0700
[SPARK-57174][SQL] Simplify Chr codegen by extracting a static Java helper
### What changes were proposed in this pull request?
Add `ExpressionImplUtils.chr(long longVal)` and route `Chr`'s eval and
codegen paths through it. `Chr.doGenCode` previously emitted a ~7-line inline
if/else chain (negative -> empty string, `(v & 0xFF) == 0` -> NUL, otherwise
the Latin-1 character); it now emits a single `ExpressionImplUtils.chr(...)`
call, and the eval path calls the same helper.
This is a plain (non-ANSI, non-try/catch) type-independent block, in line
with the broadened goal of SPARK-56908 to move fixed generated-Java logic into
static Java helpers.
### Why are the changes needed?
Part of SPARK-56908 (umbrella). Collapsing the inline if/else chain to one
call shrinks the generated Java for every stage that uses `chr`, helping with
the JVM 64KB method / constant-pool limits, Janino compile time, and JIT work.
### Does this PR introduce _any_ user-facing change?
No. The compiled behavior is identical; only the emitted Java source text
changes.
### How was this patch tested?
```
build/sbt "catalyst/testOnly *StringExpressionsSuite"
```
59/59 pass, including `string for ascii` which covers `Chr` over negative,
zero, wrap-around (256), high-bit (149) and null inputs (exercised both with
and without whole-stage codegen).
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.8)
Closes #56224 from gengliangwang/spark-chr-codegen.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
(cherry picked from commit 85b7a18b18d200f1f278568d75a6b8c85b5028df)
Signed-off-by: Gengliang Wang <[email protected]>
---
.../catalyst/expressions/ExpressionImplUtils.java | 17 ++++++++++++++++
.../catalyst/expressions/stringExpressions.scala | 23 +++-------------------
2 files changed, 20 insertions(+), 20 deletions(-)
diff --git
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
index 1053650a3709..fa1741cb08f7 100644
---
a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
+++
b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
@@ -346,6 +346,23 @@ public class ExpressionImplUtils {
return UTF8String.fromString(qtChar + sp + qtChar);
}
+ /**
+ * Returns the single-character string for the {@code chr} expression: the
+ * ASCII/Latin-1 character for {@code longVal & 0xFF}. A negative argument
+ * yields the empty string. Shared by the eval and codegen paths so the
+ * generated Java is a single call rather than an inline if/else chain.
+ */
+ public static UTF8String chr(long longVal) {
+ if (longVal < 0) {
+ return UTF8String.EMPTY_UTF8;
+ } else if ((longVal & 0xFF) == 0) {
+ return UTF8String.fromString(String.valueOf(Character.MIN_VALUE));
+ } else {
+ char c = (char) (longVal & 0xFF);
+ return UTF8String.fromString(String.valueOf(c));
+ }
+ }
+
/**
* Compiles {@code regex} with the given {@code flags} for the regexp
expression
* family, translating a {@link PatternSyntaxException} into the user-facing
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 5b5a63812dca..5c6e457421bf 100755
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2822,31 +2822,14 @@ case class Chr(child: Expression)
override def inputTypes: Seq[DataType] = Seq(LongType)
protected override def nullSafeEval(lon: Any): Any = {
- val longVal = lon.asInstanceOf[Long]
- if (longVal < 0) {
- UTF8String.EMPTY_UTF8
- } else if ((longVal & 0xFF) == 0) {
- UTF8String.fromString(Character.MIN_VALUE.toString)
- } else {
- UTF8String.fromString((longVal & 0xFF).toChar.toString)
- }
+ ExpressionImplUtils.chr(lon.asInstanceOf[Long])
}
override def contextIndependentFoldable: Boolean =
child.contextIndependentFoldable
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
- nullSafeCodeGen(ctx, ev, lon => {
- s"""
- if ($lon < 0) {
- ${ev.value} = UTF8String.EMPTY_UTF8;
- } else if (($lon & 0xFF) == 0) {
- ${ev.value} =
UTF8String.fromString(String.valueOf(Character.MIN_VALUE));
- } else {
- char c = (char)($lon & 0xFF);
- ${ev.value} = UTF8String.fromString(String.valueOf(c));
- }
- """
- })
+ val utils = classOf[ExpressionImplUtils].getName
+ nullSafeCodeGen(ctx, ev, lon => s"${ev.value} = $utils.chr($lon);")
}
override protected def withNewChildInternal(newChild: Expression): Chr =
copy(child = newChild)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]