This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7d67ff3e58fe [SPARK-55654][SQL] Enable TreePattern pruning for
EliminateSubqueryAliases and ResolveInlineTables
7d67ff3e58fe is described below
commit 7d67ff3e58fee04f831db2606c14efbdeac2c0ed
Author: Kent Yao <[email protected]>
AuthorDate: Tue Feb 24 17:58:41 2026 +0800
[SPARK-55654][SQL] Enable TreePattern pruning for EliminateSubqueryAliases
and ResolveInlineTables
### What changes were proposed in this pull request?
Replace `AlwaysProcess.fn` with pattern-based pruning in two Analyzer rules:
1. **EliminateSubqueryAliases**: Use `_.containsPattern(SUBQUERY_ALIAS)`
- Skips entire plan traversal when no `SubqueryAlias` nodes exist
- Common in resolved plans after initial resolution passes
2. **ResolveInlineTables**: Use `_.containsPattern(INLINE_TABLE_EVAL)`
- Skips traversal when no `UnresolvedInlineTable` nodes exist
- Inline tables are rare; most queries never contain them
Also adds `INLINE_TABLE_EVAL` to `UnresolvedInlineTable.nodePatterns`,
which was previously only defined on `ResolvedInlineTable`. Without this, the
pruning condition for `ResolveInlineTables` could never be satisfied for
unresolved inline tables.
Both rules previously used `AlwaysProcess.fn`, forcing full tree traversal
on every fixedPoint iteration even when no matching nodes existed.
TreePatternBits propagation enables O(1) root-level short-circuit.
### Why are the changes needed?
Performance optimization: avoids unnecessary full-plan traversals during
analysis when the relevant node types are absent.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Existing tests: `AnalysisSuite`, `EliminateSubqueryAliasesSuite`, and
inline table related tests all pass.
### Was this patch authored or co-authored using generative AI tooling?
Yes, GitHub Copilot.
Closes #54440 from yaooqinn/tree-pattern-pruning-inline-tables.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
---
.../main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +-
.../org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala | 4 ++--
.../scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala | 2 ++
3 files changed, 5 insertions(+), 3 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0cddb847c349..04bad39a88ba 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3952,7 +3952,7 @@ object EliminateSubqueryAliases extends Rule[LogicalPlan]
{
// This is also called in the beginning of the optimization phase, and as a
result
// is using transformUp rather than resolveOperators.
def apply(plan: LogicalPlan): LogicalPlan =
AnalysisHelper.allowInvokingTransformsInAnalyzer {
- plan.transformUpWithPruning(AlwaysProcess.fn, ruleId) {
+ plan.transformUpWithPruning(_.containsPattern(SUBQUERY_ALIAS), ruleId) {
case SubqueryAlias(_, child) => child
}
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 08cb70ddd5db..82615354c9c3 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
import org.apache.spark.sql.catalyst.expressions.EvalHelper
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.AlwaysProcess
+import org.apache.spark.sql.catalyst.trees.TreePattern.INLINE_TABLE_EVAL
import org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
/**
@@ -28,7 +28,7 @@ import
org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
*/
object ResolveInlineTables extends Rule[LogicalPlan] with EvalHelper {
override def apply(plan: LogicalPlan): LogicalPlan = {
- plan.resolveOperatorsWithPruning(AlwaysProcess.fn, ruleId) {
+ plan.resolveOperatorsWithPruning(_.containsPattern(INLINE_TABLE_EVAL),
ruleId) {
case table: UnresolvedInlineTable if table.expressionsResolved =>
EvaluateUnresolvedInlineTable.evaluateUnresolvedInlineTable(table)
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index fffbc7511a1d..5726c573ee7d 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -178,6 +178,8 @@ case class UnresolvedInlineTable(
rows: Seq[Seq[Expression]])
extends UnresolvedLeafNode {
+ final override val nodePatterns: Seq[TreePattern] = Seq(INLINE_TABLE_EVAL)
+
lazy val expressionsResolved: Boolean = rows.forall(_.forall(_.resolved))
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]