This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 6b84c988bda7 [SPARK-57179][SQL] Skip statically-dead null branches in 
GenerateOrdering for non-nullable sort keys
6b84c988bda7 is described below

commit 6b84c988bda71566275171ded91b6a7cbaedf7ed
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Jun 2 13:03:09 2026 -0700

    [SPARK-57179][SQL] Skip statically-dead null branches in GenerateOrdering 
for non-nullable sort keys
    
    ### What changes were proposed in this pull request?
    
    In `GenerateOrdering.genComparisons`, each order key emits a four-way 
branch: `if (l.isNull && r.isNull) {} else if (l.isNull) {..} else if 
(r.isNull) {..} else { compare }`. When the order key is statically 
non-nullable, both `l.isNull` and `r.isNull` are `FalseLiteral`, so the three 
null-handling branches are dead and only the `else { compare }` ever runs.
    
    This patch detects that case (`l.isNull == FalseLiteral && r.isNull == 
FalseLiteral` -- the same idiom `GenerateUnsafeProjection` uses) and emits only 
the value comparison, wrapped in a block so `comp` stays scoped when 
`splitExpressions` concatenates multiple comparisons. The nullable path is 
unchanged.
    
    ### Why are the changes needed?
    
    Part of SPARK-56908 (umbrella). `GenerateOrdering` backs `SortExec`, range 
partitioning, window, and sort-merge join; sort keys are very commonly 
non-nullable, so this removes dead branches from a hot, widely-generated 
comparator (helping with the JVM 64KB method / constant-pool limits, Janino 
compile time, and JIT work).
    
    ### Does this PR introduce _any_ user-facing change?
    
    No. The compiled behavior is identical; only the emitted Java source text 
changes.
    
    ### How was this patch tested?
    
    Added `GenerateOrdering with non-nullable keys: <type>` cases to 
`OrderingSuite` (atomic + complex types, mixed asc/desc to cover both the 
`comp` and `-comp` emissions, two keys to cover the `comp` scoping), 
cross-checked against `InterpretedOrdering`.
    
    ```
    build/sbt "catalyst/testOnly *OrderingSuite"                        # 89/89
    build/sbt "sql/testOnly org.apache.spark.sql.execution.SortSuite"   # 
255/255
    ```
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code (Opus 4.8)
    
    Closes #56230 from gengliangwang/spark-ordering-deadnull-codegen.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../expressions/codegen/GenerateOrdering.scala     | 47 +++++++++++++++-------
 .../sql/catalyst/expressions/OrderingSuite.scala   | 41 +++++++++++++++++++
 2 files changed, 73 insertions(+), 15 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 4bf6eafdb2d6..c0bbad1127f1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -88,16 +88,35 @@ object GenerateOrdering extends 
CodeGenerator[Seq[SortOrder], BaseOrdering] with
     val comparisons = rowAKeys.zip(rowBKeys).zipWithIndex.map { case ((l, r), 
i) =>
       val dt = ordering(i).child.dataType
       val asc = ordering(i).isAscending
-      val nullOrdering = ordering(i).nullOrdering
-      val lRetValue = nullOrdering match {
-        case NullsFirst => "-1"
-        case NullsLast => "1"
-      }
-      val rRetValue = nullOrdering match {
-        case NullsFirst => "1"
-        case NullsLast => "-1"
-      }
-      s"""
+      val compareCode =
+        s"""
+          |int comp = ${ctx.genComp(dt, l.value, r.value)};
+          |if (comp != 0) {
+          |  return ${if (asc) "comp" else "-comp"};
+          |}
+        """.stripMargin
+      if (l.isNull == FalseLiteral && r.isNull == FalseLiteral) {
+        // The order key is statically non-nullable, so the three 
null-handling branches
+        // are dead. Emit only the value comparison, wrapped in a block so 
that `comp` is
+        // scoped when `splitExpressions` concatenates multiple comparisons.
+        s"""
+          |${l.code}
+          |${r.code}
+          |{
+          |  ${compareCode.trim}
+          |}
+        """.stripMargin
+      } else {
+        val nullOrdering = ordering(i).nullOrdering
+        val lRetValue = nullOrdering match {
+          case NullsFirst => "-1"
+          case NullsLast => "1"
+        }
+        val rRetValue = nullOrdering match {
+          case NullsFirst => "1"
+          case NullsLast => "-1"
+        }
+        s"""
           |${l.code}
           |${r.code}
           |if (${l.isNull} && ${r.isNull}) {
@@ -107,12 +126,10 @@ object GenerateOrdering extends 
CodeGenerator[Seq[SortOrder], BaseOrdering] with
           |} else if (${r.isNull}) {
           |  return $rRetValue;
           |} else {
-          |  int comp = ${ctx.genComp(dt, l.value, r.value)};
-          |  if (comp != 0) {
-          |    return ${if (asc) "comp" else "-comp"};
-          |  }
+          |  ${compareCode.trim}
           |}
-      """.stripMargin
+        """.stripMargin
+      }
     }
 
     val code = ctx.splitExpressions(
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 8505a1c8a2c1..807849a0f1ea 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -132,6 +132,47 @@ class OrderingSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     }
   }
 
+  // SPARK-57179: when the order keys are statically non-nullable, 
GenerateOrdering skips the
+  // dead null-handling branches. Mirror the test above with non-nullable keys 
(and a mix of
+  // ascending/descending so both the `comp` and `-comp` emissions are 
covered) to exercise
+  // that code path and the per-comparison `comp` scoping when multiple keys 
are concatenated.
+  {
+    val structType =
+      new StructType()
+        .add("f1", FloatType, nullable = true)
+        .add("f2", ArrayType(BooleanType, containsNull = true), nullable = 
true)
+    val arrayOfStructType = ArrayType(structType)
+    val complexTypes = ArrayType(IntegerType) :: structType :: 
arrayOfStructType :: Nil
+    (DataTypeTestUtils.atomicTypes ++ complexTypes).foreach { dataType =>
+      test(s"GenerateOrdering with non-nullable keys: $dataType") {
+        val sortOrders =
+          BoundReference(0, dataType, nullable = false).asc ::
+            BoundReference(1, dataType, nullable = false).desc :: Nil
+        val rowOrdering = new InterpretedOrdering(sortOrders)
+        val genOrdering = GenerateOrdering.generate(sortOrders)
+        val rowType = StructType(
+          StructField("a", dataType, nullable = false) ::
+            StructField("b", dataType, nullable = false) :: Nil)
+        val maybeDataGenerator = RandomDataGenerator.forType(rowType, nullable 
= false)
+        assert(maybeDataGenerator.isDefined)
+        val randGenerator = maybeDataGenerator.get
+        val toCatalyst = 
CatalystTypeConverters.createToCatalystConverter(rowType)
+        for (_ <- 1 to 50) {
+          val a = toCatalyst(randGenerator()).asInstanceOf[InternalRow]
+          val b = toCatalyst(randGenerator()).asInstanceOf[InternalRow]
+          withClue(s"a = $a, b = $b") {
+            assert(genOrdering.compare(a, a) === 0)
+            assert(genOrdering.compare(b, b) === 0)
+            assert(signum(genOrdering.compare(a, b)) === -1 * 
signum(genOrdering.compare(b, a)))
+            assert(
+              signum(rowOrdering.compare(a, b)) === 
signum(genOrdering.compare(a, b)),
+              "Generated and non-generated orderings should agree")
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KiB") {
     val sortOrder = Literal("abc").asc
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to