This is an automated email from the ASF dual-hosted git repository.
gengliangwang pushed a commit to branch branch-4.x
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.x by this push:
new 396966aaca58 [SPARK-57028][SQL] HashJoin: drop conditionPassed in
codegenOuter when there is no join condition
396966aaca58 is described below
commit 396966aaca5873c757db42c0bb2136763f571023
Author: Gengliang Wang <[email protected]>
AuthorDate: Sat May 30 21:57:28 2026 -0700
[SPARK-57028][SQL] HashJoin: drop conditionPassed in codegenOuter when
there is no join condition
### What changes were proposed in this pull request?
This is a sub-task of
[SPARK-56908](https://issues.apache.org/jira/browse/SPARK-56908).
`HashJoin.codegenOuter` emits a `boolean conditionPassed` variable plus
either an `if (!conditionPassed) { reset }` block (unique-key path) or an `if
(conditionPassed) { ... }` wrap around the inner loop body (non-unique-key
path) regardless of whether `condition` is defined.
When `condition.isEmpty`:
- the variable is initialized to `true` and never reassigned;
- the `if (!conditionPassed)` reset block is dead;
- the `if (conditionPassed)` wrap is unconditional.
Detect `condition.isEmpty` and omit the variable, the reset block, and the
wrap.
### Why are the changes needed?
Smaller generated Java per stage for the common case where outer joins have
no join condition. JIT eliminates the dead code at runtime; the win is smaller
generated source, more 64KB method-limit headroom, and slightly faster Janino
compile.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing `OuterJoinSuite` covers `BroadcastHashJoin` and `ShuffledHashJoin`
outer joins with whole-stage codegen on and off, with and without join
conditions.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code
Closes #56076 from gengliangwang/SPARK-57028-hashjoin-conditionpassed.
Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Gengliang Wang <[email protected]>
(cherry picked from commit d3e70e39bab790bac7344a76a8418c0b5e5b5035)
Signed-off-by: Gengliang Wang <[email protected]>
---
.../spark/sql/execution/joins/HashJoin.scala | 35 +++++++++++++++-------
1 file changed, 24 insertions(+), 11 deletions(-)
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index fab14dba444d..9df791aa8de0 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -457,9 +457,12 @@ trait HashJoin extends JoinCodegenSupport {
val buildVars = genOneSideJoinVars(ctx, matched, buildPlan,
setDefaultValue = true)
val numOutput = metricTerm(ctx, "numOutputRows")
- // filter the output via condition
- val conditionPassed = ctx.freshName("conditionPassed")
- val checkCondition = if (condition.isDefined) {
+ // filter the output via condition. When there is no condition, skip the
`conditionPassed`
+ // variable and the wrapping `if (!conditionPassed)` / `if
(conditionPassed)` branches that
+ // would always be dead / unconditional.
+ val hasCondition = condition.isDefined
+ val conditionPassed = if (hasCondition) ctx.freshName("conditionPassed")
else ""
+ val checkCondition = if (hasCondition) {
val expr = condition.get
// evaluate the variables from build side that used by condition
val eval = evaluateRequiredVariables(buildPlan.output, buildVars,
expr.references)
@@ -475,7 +478,7 @@ trait HashJoin extends JoinCodegenSupport {
|}
""".stripMargin
} else {
- s"final boolean $conditionPassed = true;"
+ ""
}
val resultVars = buildSide match {
@@ -484,17 +487,24 @@ trait HashJoin extends JoinCodegenSupport {
}
if (keyIsUnique) {
+ val resetWhenConditionFails = if (hasCondition) {
+ s"""
+ |if (!$conditionPassed) {
+ | $matched = null;
+ | // reset the variables those are already evaluated.
+ | ${buildVars.filter(_.code.isEmpty).map(v => s"${v.isNull} =
true;").mkString("\n")}
+ |}
+ """.stripMargin
+ } else {
+ ""
+ }
s"""
|// generate join key for stream side
|${keyEv.code}
|// find matches from HashedRelation
|UnsafeRow $matched = $anyNull ? null:
(UnsafeRow)$relationTerm.getValue(${keyEv.value});
|${checkCondition.trim}
- |if (!$conditionPassed) {
- | $matched = null;
- | // reset the variables those are already evaluated.
- | ${buildVars.filter(_.code.isEmpty).map(v => s"${v.isNull} =
true;").mkString("\n")}
- |}
+ |$resetWhenConditionFails
|$numOutput.add(1);
|${consume(ctx, resultVars)}
""".stripMargin
@@ -514,6 +524,9 @@ trait HashJoin extends JoinCodegenSupport {
""
}
+ val (conditionGuardOpen, conditionGuardClose) =
+ if (hasCondition) (s"if ($conditionPassed) {", "}") else ("", "")
+
s"""
|// generate join key for stream side
|${keyEv.code}
@@ -525,12 +538,12 @@ trait HashJoin extends JoinCodegenSupport {
| UnsafeRow $matched = $matches != null && $matches.hasNext() ?
| (UnsafeRow) $matches.next() : null;
| ${checkCondition.trim}
- | if ($conditionPassed) {
+ | $conditionGuardOpen
| $evaluateSingleCheck
| $found = true;
| $numOutput.add(1);
| ${consume(ctx, resultVars)}
- | }
+ | $conditionGuardClose
|}
""".stripMargin
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]