This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new 79f570d1acbb [SPARK-57180][SQL] Skip statically-dead setNullAt branch
in GenerateSafeProjection for non-nullable fields
79f570d1acbb is described below
commit 79f570d1acbb4aa99b6c905eca164837bb2ad6a4
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Jun 2 13:40:12 2026 -0700
[SPARK-57180][SQL] Skip statically-dead setNullAt branch in
GenerateSafeProjection for non-nullable fields
### What changes were proposed in this pull request?
`GenerateSafeProjection` emits `if (isNull) { mutableRow.setNullAt(i); }
else { convert; setColumn; }` for every projected field. When the field
expression is statically non-nullable, `isNull` is `FalseLiteral`, so the
`setNullAt` branch is dead and only the `else` ever runs. This patch detects
that case (`evaluationCode.isNull == FalseLiteral` -- the idiom already used by
`GenerateUnsafeProjection`) and emits just the conversion + `setColumn`. The
nullable path is unchanged. (`Genera [...]
### Why are the changes needed?
Part of SPARK-56908 (umbrella). Removes a dead branch (and the `setNullAt`
call) per non-nullable field from the generated safe projection, shrinking the
emitted Java for wide non-nullable schemas (helping with the JVM 64KB method /
constant-pool limits, Janino compile time, and JIT work).
### Does this PR introduce _any_ user-facing change?
No. The compiled behavior is identical; only the emitted Java source text
changes.
### How was this patch tested?
Added `SPARK-57180: SafeProjection over statically non-nullable fields` to
`GeneratedProjectionSuite`, projecting non-nullable int/string/struct/array
fields through an unsafe -> safe round trip and asserting the values.
```
build/sbt "catalyst/testOnly *GeneratedProjectionSuite" # 10/10
```
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.8)
Closes #56231 from gengliangwang/spark-safeprojection-deadnull-codegen.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
(cherry picked from commit 2d83237189f743d5b592683290cbac94481f3ef0)
Signed-off-by: Gengliang Wang <[email protected]>
---
.../codegen/GenerateSafeProjection.scala | 15 +++++++++---
.../codegen/GeneratedProjectionSuite.scala | 27 ++++++++++++++++++++++
2 files changed, 39 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 27ef9690d6e6..d4f3a6100d52 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -155,15 +155,24 @@ object GenerateSafeProjection extends
CodeGenerator[Seq[Expression], Projection]
case (e, i) =>
val evaluationCode = e.genCode(ctx)
val converter = convertToSafe(ctx, evaluationCode.value, e.dataType)
- evaluationCode.code.toString +
+ val setValue =
+ s"""
+ ${converter.code}
+ ${CodeGenerator.setColumn("mutableRow", e.dataType, i,
converter.value)};
+ """
+ val writeField = if (evaluationCode.isNull == FalseLiteral) {
+ // The expression is statically non-nullable, so the setNullAt
branch is dead.
+ setValue
+ } else {
s"""
if (${evaluationCode.isNull}) {
mutableRow.setNullAt($i);
} else {
- ${converter.code}
- ${CodeGenerator.setColumn("mutableRow", e.dataType, i,
converter.value)};
+ $setValue
}
"""
+ }
+ evaluationCode.code.toString + writeField
}
val allExpressions = ctx.splitExpressionsWithCurrentInputs(expressionCodes)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 3a322b8de02b..b51fdf4f8068 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -211,6 +211,33 @@ class GeneratedProjectionSuite extends SparkFunSuite with
ExpressionEvalHelper {
assert(row.getStruct(0, 1).getString(0).toString == "a")
}
+ test("SPARK-57180: SafeProjection over statically non-nullable fields") {
+ // Non-nullable fields take the dead-branch-free path in
GenerateSafeProjection (no
+ // setNullAt). Cover atomic, struct and array types and verify the
projected values.
+ val fields = Array[DataType](
+ IntegerType,
+ StringType,
+ new StructType().add("i", IntegerType, nullable = false),
+ ArrayType(IntegerType, containsNull = false))
+ val refs = fields.zipWithIndex.map { case (dt, i) =>
+ BoundReference(i, dt, nullable = false)
+ }.toImmutableArraySeq
+
+ val safeProj = GenerateSafeProjection.generate(refs)
+ val unsafeProj = GenerateUnsafeProjection.generate(refs)
+ val input = InternalRow(
+ 1,
+ UTF8String.fromString("a"),
+ InternalRow(2),
+ new GenericArrayData(Array(3, 4)))
+ val row = safeProj.apply(unsafeProj.apply(input))
+
+ assert(row.getInt(0) == 1)
+ assert(row.getUTF8String(1).toString == "a")
+ assert(row.getStruct(2, 1).getInt(0) == 2)
+ assert(row.getArray(3).toIntArray().sameElements(Array(3, 4)))
+ }
+
test("SPARK-22699: GenerateSafeProjection should not use global variables
for struct") {
val safeProj = GenerateSafeProjection.generate(
Seq(BoundReference(0, new StructType().add("i", IntegerType), true)))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]