This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6b84c988bda7 [SPARK-57179][SQL] Skip statically-dead null branches in
GenerateOrdering for non-nullable sort keys
6b84c988bda7 is described below
commit 6b84c988bda71566275171ded91b6a7cbaedf7ed
Author: Gengliang Wang <[email protected]>
AuthorDate: Tue Jun 2 13:03:09 2026 -0700
[SPARK-57179][SQL] Skip statically-dead null branches in GenerateOrdering
for non-nullable sort keys
### What changes were proposed in this pull request?
In `GenerateOrdering.genComparisons`, each order key emits a four-way
branch: `if (l.isNull && r.isNull) {} else if (l.isNull) {..} else if
(r.isNull) {..} else { compare }`. When the order key is statically
non-nullable, both `l.isNull` and `r.isNull` are `FalseLiteral`, so the three
null-handling branches are dead and only the `else { compare }` ever runs.
This patch detects that case (`l.isNull == FalseLiteral && r.isNull ==
FalseLiteral` -- the same idiom `GenerateUnsafeProjection` uses) and emits only
the value comparison, wrapped in a block so `comp` stays scoped when
`splitExpressions` concatenates multiple comparisons. The nullable path is
unchanged.
### Why are the changes needed?
Part of SPARK-56908 (umbrella). `GenerateOrdering` backs `SortExec`, range
partitioning, window, and sort-merge join; sort keys are very commonly
non-nullable, so this removes dead branches from a hot, widely-generated
comparator (helping with the JVM 64KB method / constant-pool limits, Janino
compile time, and JIT work).
### Does this PR introduce _any_ user-facing change?
No. The compiled behavior is identical; only the emitted Java source text
changes.
### How was this patch tested?
Added `GenerateOrdering with non-nullable keys: <type>` cases to
`OrderingSuite` (atomic + complex types, mixed asc/desc to cover both the
`comp` and `-comp` emissions, two keys to cover the `comp` scoping),
cross-checked against `InterpretedOrdering`.
```
build/sbt "catalyst/testOnly *OrderingSuite" # 89/89
build/sbt "sql/testOnly org.apache.spark.sql.execution.SortSuite" #
255/255
```
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Opus 4.8)
Closes #56230 from gengliangwang/spark-ordering-deadnull-codegen.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
---
.../expressions/codegen/GenerateOrdering.scala | 47 +++++++++++++++-------
.../sql/catalyst/expressions/OrderingSuite.scala | 41 +++++++++++++++++++
2 files changed, 73 insertions(+), 15 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 4bf6eafdb2d6..c0bbad1127f1 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -88,16 +88,35 @@ object GenerateOrdering extends
CodeGenerator[Seq[SortOrder], BaseOrdering] with
val comparisons = rowAKeys.zip(rowBKeys).zipWithIndex.map { case ((l, r),
i) =>
val dt = ordering(i).child.dataType
val asc = ordering(i).isAscending
- val nullOrdering = ordering(i).nullOrdering
- val lRetValue = nullOrdering match {
- case NullsFirst => "-1"
- case NullsLast => "1"
- }
- val rRetValue = nullOrdering match {
- case NullsFirst => "1"
- case NullsLast => "-1"
- }
- s"""
+ val compareCode =
+ s"""
+ |int comp = ${ctx.genComp(dt, l.value, r.value)};
+ |if (comp != 0) {
+ | return ${if (asc) "comp" else "-comp"};
+ |}
+ """.stripMargin
+ if (l.isNull == FalseLiteral && r.isNull == FalseLiteral) {
+ // The order key is statically non-nullable, so the three
null-handling branches
+ // are dead. Emit only the value comparison, wrapped in a block so
that `comp` is
+ // scoped when `splitExpressions` concatenates multiple comparisons.
+ s"""
+ |${l.code}
+ |${r.code}
+ |{
+ | ${compareCode.trim}
+ |}
+ """.stripMargin
+ } else {
+ val nullOrdering = ordering(i).nullOrdering
+ val lRetValue = nullOrdering match {
+ case NullsFirst => "-1"
+ case NullsLast => "1"
+ }
+ val rRetValue = nullOrdering match {
+ case NullsFirst => "1"
+ case NullsLast => "-1"
+ }
+ s"""
|${l.code}
|${r.code}
|if (${l.isNull} && ${r.isNull}) {
@@ -107,12 +126,10 @@ object GenerateOrdering extends
CodeGenerator[Seq[SortOrder], BaseOrdering] with
|} else if (${r.isNull}) {
| return $rRetValue;
|} else {
- | int comp = ${ctx.genComp(dt, l.value, r.value)};
- | if (comp != 0) {
- | return ${if (asc) "comp" else "-comp"};
- | }
+ | ${compareCode.trim}
|}
- """.stripMargin
+ """.stripMargin
+ }
}
val code = ctx.splitExpressions(
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 8505a1c8a2c1..807849a0f1ea 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -132,6 +132,47 @@ class OrderingSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
+ // SPARK-57179: when the order keys are statically non-nullable,
GenerateOrdering skips the
+ // dead null-handling branches. Mirror the test above with non-nullable keys
(and a mix of
+ // ascending/descending so both the `comp` and `-comp` emissions are
covered) to exercise
+ // that code path and the per-comparison `comp` scoping when multiple keys
are concatenated.
+ {
+ val structType =
+ new StructType()
+ .add("f1", FloatType, nullable = true)
+ .add("f2", ArrayType(BooleanType, containsNull = true), nullable =
true)
+ val arrayOfStructType = ArrayType(structType)
+ val complexTypes = ArrayType(IntegerType) :: structType ::
arrayOfStructType :: Nil
+ (DataTypeTestUtils.atomicTypes ++ complexTypes).foreach { dataType =>
+ test(s"GenerateOrdering with non-nullable keys: $dataType") {
+ val sortOrders =
+ BoundReference(0, dataType, nullable = false).asc ::
+ BoundReference(1, dataType, nullable = false).desc :: Nil
+ val rowOrdering = new InterpretedOrdering(sortOrders)
+ val genOrdering = GenerateOrdering.generate(sortOrders)
+ val rowType = StructType(
+ StructField("a", dataType, nullable = false) ::
+ StructField("b", dataType, nullable = false) :: Nil)
+ val maybeDataGenerator = RandomDataGenerator.forType(rowType, nullable
= false)
+ assert(maybeDataGenerator.isDefined)
+ val randGenerator = maybeDataGenerator.get
+ val toCatalyst =
CatalystTypeConverters.createToCatalystConverter(rowType)
+ for (_ <- 1 to 50) {
+ val a = toCatalyst(randGenerator()).asInstanceOf[InternalRow]
+ val b = toCatalyst(randGenerator()).asInstanceOf[InternalRow]
+ withClue(s"a = $a, b = $b") {
+ assert(genOrdering.compare(a, a) === 0)
+ assert(genOrdering.compare(b, b) === 0)
+ assert(signum(genOrdering.compare(a, b)) === -1 *
signum(genOrdering.compare(b, a)))
+ assert(
+ signum(rowOrdering.compare(a, b)) ===
signum(genOrdering.compare(a, b)),
+ "Generated and non-generated orderings should agree")
+ }
+ }
+ }
+ }
+ }
+
test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KiB") {
val sortOrder = Literal("abc").asc
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]