This is an automated email from the ASF dual-hosted git repository.

gengliangwang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new d3e70e39bab7 [SPARK-57028][SQL] HashJoin: drop conditionPassed in 
codegenOuter when there is no join condition
d3e70e39bab7 is described below

commit d3e70e39bab790bac7344a76a8418c0b5e5b5035
Author: Gengliang Wang <[email protected]>
AuthorDate: Sat May 30 21:57:28 2026 -0700

    [SPARK-57028][SQL] HashJoin: drop conditionPassed in codegenOuter when 
there is no join condition
    
    ### What changes were proposed in this pull request?
    
    This is a sub-task of 
[SPARK-56908](https://issues.apache.org/jira/browse/SPARK-56908).
    
    `HashJoin.codegenOuter` emits a `boolean conditionPassed` variable plus 
either an `if (!conditionPassed) { reset }` block (unique-key path) or an `if 
(conditionPassed) { ... }` wrap around the inner loop body (non-unique-key 
path) regardless of whether `condition` is defined.
    
    When `condition.isEmpty`:
    - the variable is initialized to `true` and never reassigned;
    - the `if (!conditionPassed)` reset block is dead;
    - the `if (conditionPassed)` wrap is unconditional.
    
    Detect `condition.isEmpty` and omit the variable, the reset block, and the 
wrap.
    
    ### Why are the changes needed?
    
    Smaller generated Java per stage for the common case where outer joins have 
no join condition. JIT eliminates the dead code at runtime; the win is smaller 
generated source, more 64KB method-limit headroom, and slightly faster Janino 
compile.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing `OuterJoinSuite` covers `BroadcastHashJoin` and `ShuffledHashJoin` 
outer joins with whole-stage codegen on and off, with and without join 
conditions.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Generated-by: Claude Code
    
    Closes #56076 from gengliangwang/SPARK-57028-hashjoin-conditionpassed.
    
    Authored-by: Gengliang Wang <[email protected]>
    Signed-off-by: Gengliang Wang <[email protected]>
---
 .../spark/sql/execution/joins/HashJoin.scala       | 35 +++++++++++++++-------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index fab14dba444d..9df791aa8de0 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -457,9 +457,12 @@ trait HashJoin extends JoinCodegenSupport {
     val buildVars = genOneSideJoinVars(ctx, matched, buildPlan, 
setDefaultValue = true)
     val numOutput = metricTerm(ctx, "numOutputRows")
 
-    // filter the output via condition
-    val conditionPassed = ctx.freshName("conditionPassed")
-    val checkCondition = if (condition.isDefined) {
+    // filter the output via condition. When there is no condition, skip the 
`conditionPassed`
+    // variable and the wrapping `if (!conditionPassed)` / `if 
(conditionPassed)` branches that
+    // would always be dead / unconditional.
+    val hasCondition = condition.isDefined
+    val conditionPassed = if (hasCondition) ctx.freshName("conditionPassed") 
else ""
+    val checkCondition = if (hasCondition) {
       val expr = condition.get
       // evaluate the variables from build side that used by condition
       val eval = evaluateRequiredVariables(buildPlan.output, buildVars, 
expr.references)
@@ -475,7 +478,7 @@ trait HashJoin extends JoinCodegenSupport {
          |}
        """.stripMargin
     } else {
-      s"final boolean $conditionPassed = true;"
+      ""
     }
 
     val resultVars = buildSide match {
@@ -484,17 +487,24 @@ trait HashJoin extends JoinCodegenSupport {
     }
 
     if (keyIsUnique) {
+      val resetWhenConditionFails = if (hasCondition) {
+        s"""
+           |if (!$conditionPassed) {
+           |  $matched = null;
+           |  // reset the variables those are already evaluated.
+           |  ${buildVars.filter(_.code.isEmpty).map(v => s"${v.isNull} = 
true;").mkString("\n")}
+           |}
+         """.stripMargin
+      } else {
+        ""
+      }
       s"""
          |// generate join key for stream side
          |${keyEv.code}
          |// find matches from HashedRelation
          |UnsafeRow $matched = $anyNull ? null: 
(UnsafeRow)$relationTerm.getValue(${keyEv.value});
          |${checkCondition.trim}
-         |if (!$conditionPassed) {
-         |  $matched = null;
-         |  // reset the variables those are already evaluated.
-         |  ${buildVars.filter(_.code.isEmpty).map(v => s"${v.isNull} = 
true;").mkString("\n")}
-         |}
+         |$resetWhenConditionFails
          |$numOutput.add(1);
          |${consume(ctx, resultVars)}
        """.stripMargin
@@ -514,6 +524,9 @@ trait HashJoin extends JoinCodegenSupport {
         ""
       }
 
+      val (conditionGuardOpen, conditionGuardClose) =
+        if (hasCondition) (s"if ($conditionPassed) {", "}") else ("", "")
+
       s"""
          |// generate join key for stream side
          |${keyEv.code}
@@ -525,12 +538,12 @@ trait HashJoin extends JoinCodegenSupport {
          |  UnsafeRow $matched = $matches != null && $matches.hasNext() ?
          |    (UnsafeRow) $matches.next() : null;
          |  ${checkCondition.trim}
-         |  if ($conditionPassed) {
+         |  $conditionGuardOpen
          |    $evaluateSingleCheck
          |    $found = true;
          |    $numOutput.add(1);
          |    ${consume(ctx, resultVars)}
-         |  }
+         |  $conditionGuardClose
          |}
        """.stripMargin
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to