This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 7d67ff3e58fe [SPARK-55654][SQL] Enable TreePattern pruning for 
EliminateSubqueryAliases and ResolveInlineTables
7d67ff3e58fe is described below

commit 7d67ff3e58fee04f831db2606c14efbdeac2c0ed
Author: Kent Yao <[email protected]>
AuthorDate: Tue Feb 24 17:58:41 2026 +0800

    [SPARK-55654][SQL] Enable TreePattern pruning for EliminateSubqueryAliases 
and ResolveInlineTables
    
    ### What changes were proposed in this pull request?
    
    Replace `AlwaysProcess.fn` with pattern-based pruning in two Analyzer rules:
    
    1. **EliminateSubqueryAliases**: Use `_.containsPattern(SUBQUERY_ALIAS)`
       - Skips entire plan traversal when no `SubqueryAlias` nodes exist
       - Common in resolved plans after initial resolution passes
    
    2. **ResolveInlineTables**: Use `_.containsPattern(INLINE_TABLE_EVAL)`
       - Skips traversal when no `UnresolvedInlineTable` nodes exist
       - Inline tables are rare; most queries never contain them
    
    Also adds `INLINE_TABLE_EVAL` to `UnresolvedInlineTable.nodePatterns`, 
which was previously only defined on `ResolvedInlineTable`. Without this, the 
pruning condition for `ResolveInlineTables` could never be satisfied for 
unresolved inline tables.
    
    Both rules previously used `AlwaysProcess.fn`, forcing full tree traversal 
on every fixedPoint iteration even when no matching nodes existed. 
TreePatternBits propagation enables O(1) root-level short-circuit.
    
    ### Why are the changes needed?
    
    Performance optimization: avoids unnecessary full-plan traversals during 
analysis when the relevant node types are absent.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing tests: `AnalysisSuite`, `EliminateSubqueryAliasesSuite`, and 
inline table related tests all pass.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    Yes, GitHub Copilot.
    
    Closes #54440 from yaooqinn/tree-pattern-pruning-inline-tables.
    
    Authored-by: Kent Yao <[email protected]>
    Signed-off-by: Kent Yao <[email protected]>
---
 .../main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala  | 2 +-
 .../org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala  | 4 ++--
 .../scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala     | 2 ++
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 0cddb847c349..04bad39a88ba 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3952,7 +3952,7 @@ object EliminateSubqueryAliases extends Rule[LogicalPlan] 
{
   // This is also called in the beginning of the optimization phase, and as a 
result
   // is using transformUp rather than resolveOperators.
   def apply(plan: LogicalPlan): LogicalPlan = 
AnalysisHelper.allowInvokingTransformsInAnalyzer {
-    plan.transformUpWithPruning(AlwaysProcess.fn, ruleId) {
+    plan.transformUpWithPruning(_.containsPattern(SUBQUERY_ALIAS), ruleId) {
       case SubqueryAlias(_, child) => child
     }
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 08cb70ddd5db..82615354c9c3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.EvalHelper
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.AlwaysProcess
+import org.apache.spark.sql.catalyst.trees.TreePattern.INLINE_TABLE_EVAL
 import org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
 
 /**
@@ -28,7 +28,7 @@ import 
org.apache.spark.sql.catalyst.util.EvaluateUnresolvedInlineTable
  */
 object ResolveInlineTables extends Rule[LogicalPlan] with EvalHelper {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    plan.resolveOperatorsWithPruning(AlwaysProcess.fn, ruleId) {
+    plan.resolveOperatorsWithPruning(_.containsPattern(INLINE_TABLE_EVAL), 
ruleId) {
       case table: UnresolvedInlineTable if table.expressionsResolved =>
         EvaluateUnresolvedInlineTable.evaluateUnresolvedInlineTable(table)
     }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index fffbc7511a1d..5726c573ee7d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -178,6 +178,8 @@ case class UnresolvedInlineTable(
     rows: Seq[Seq[Expression]])
   extends UnresolvedLeafNode {
 
+  final override val nodePatterns: Seq[TreePattern] = Seq(INLINE_TABLE_EVAL)
+
   lazy val expressionsResolved: Boolean = rows.forall(_.forall(_.resolved))
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to