This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-4.x by this push:
     new 79f570d1acbb [SPARK-57180][SQL] Skip statically-dead setNullAt branch 
in GenerateSafeProjection for non-nullable fields
79f570d1acbb is described below

commit 79f570d1acbb4aa99b6c905eca164837bb2ad6a4
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Jun 2 13:40:12 2026 -0700

    [SPARK-57180][SQL] Skip statically-dead setNullAt branch in 
GenerateSafeProjection for non-nullable fields
    
    ### What changes were proposed in this pull request?
    
    `GenerateSafeProjection` emits `if (isNull) { mutableRow.setNullAt(i); } 
else { convert; setColumn; }` for every projected field. When the field 
expression is statically non-nullable, `isNull` is `FalseLiteral`, so the 
`setNullAt` branch is dead and only the `else` ever runs. This patch detects 
that case (`evaluationCode.isNull == FalseLiteral` -- the idiom already used by 
`GenerateUnsafeProjection`) and emits just the conversion + `setColumn`. The 
nullable path is unchanged. (`Genera [...]
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella). Removes a dead branch (and the `setNullAt` 
call) per non-nullable field from the generated safe projection, shrinking the 
emitted Java for wide non-nullable schemas (helping with the JVM 64KB method / 
constant-pool limits, Janino compile time, and JIT work).
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. The compiled behavior is identical; only the emitted Java source text 
changes.
    
    ### How was this patch tested?
    
    Added `SPARK-57180: SafeProjection over statically non-nullable fields` to 
`GeneratedProjectionSuite`, projecting non-nullable int/string/struct/array 
fields through an unsafe -> safe round trip and asserting the values.
    
    ```
    build/sbt "catalyst/testOnly *GeneratedProjectionSuite"   # 10/10
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8)
    
    Closes #56231 from gengliangwang/spark-safeprojection-deadnull-codegen.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
    (cherry picked from commit 2d83237189f743d5b592683290cbac94481f3ef0)
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../codegen/GenerateSafeProjection.scala           | 15 +++++++++---
 .../codegen/GeneratedProjectionSuite.scala         | 27 ++++++++++++++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 27ef9690d6e6..d4f3a6100d52 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -155,15 +155,24 @@ object GenerateSafeProjection extends 
CodeGenerator[Seq[Expression], Projection]
       case (e, i) =>
         val evaluationCode = e.genCode(ctx)
         val converter = convertToSafe(ctx, evaluationCode.value, e.dataType)
-        evaluationCode.code.toString +
+        val setValue =
+          s"""
+            ${converter.code}
+            ${CodeGenerator.setColumn("mutableRow", e.dataType, i, 
converter.value)};
+          """
+        val writeField = if (evaluationCode.isNull == FalseLiteral) {
+          // The expression is statically non-nullable, so the setNullAt 
branch is dead.
+          setValue
+        } else {
           s"""
             if (${evaluationCode.isNull}) {
               mutableRow.setNullAt($i);
             } else {
-              ${converter.code}
-              ${CodeGenerator.setColumn("mutableRow", e.dataType, i, 
converter.value)};
+              $setValue
             }
           """
+        }
+        evaluationCode.code.toString + writeField
     }
     val allExpressions = ctx.splitExpressionsWithCurrentInputs(expressionCodes)
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 3a322b8de02b..b51fdf4f8068 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -211,6 +211,33 @@ class GeneratedProjectionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     assert(row.getStruct(0, 1).getString(0).toString == "a")
   }
 
+  test("SPARK-57180: SafeProjection over statically non-nullable fields") {
+    // Non-nullable fields take the dead-branch-free path in 
GenerateSafeProjection (no
+    // setNullAt). Cover atomic, struct and array types and verify the 
projected values.
+    val fields = Array[DataType](
+      IntegerType,
+      StringType,
+      new StructType().add("i", IntegerType, nullable = false),
+      ArrayType(IntegerType, containsNull = false))
+    val refs = fields.zipWithIndex.map { case (dt, i) =>
+      BoundReference(i, dt, nullable = false)
+    }.toImmutableArraySeq
+
+    val safeProj = GenerateSafeProjection.generate(refs)
+    val unsafeProj = GenerateUnsafeProjection.generate(refs)
+    val input = InternalRow(
+      1,
+      UTF8String.fromString("a"),
+      InternalRow(2),
+      new GenericArrayData(Array(3, 4)))
+    val row = safeProj.apply(unsafeProj.apply(input))
+
+    assert(row.getInt(0) == 1)
+    assert(row.getUTF8String(1).toString == "a")
+    assert(row.getStruct(2, 1).getInt(0) == 2)
+    assert(row.getArray(3).toIntArray().sameElements(Array(3, 4)))
+  }
+
   test("SPARK-22699: GenerateSafeProjection should not use global variables 
for struct") {
     val safeProj = GenerateSafeProjection.generate(
       Seq(BoundReference(0, new StructType().add("i", IntegerType), true)))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to