This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2ed58ab07614 [SPARK-53773][SQL] Recover alphabetic ordering of rules
in `RuleIdCollection`
2ed58ab07614 is described below
commit 2ed58ab07614ae10fedcfa2c12c80007a9636b12
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Wed Oct 1 07:58:30 2025 -0700
[SPARK-53773][SQL] Recover alphabetic ordering of rules in
`RuleIdCollection`
### What changes were proposed in this pull request?
This PR aims to recover alphabetic ordering of rules in `RuleIdCollection`
class for Apache Spark 4.1.0.
### Why are the changes needed?
Since `rulesNeedingIds` was originally defined to be in an alphabetic order
like the following, we had better recover the ordering according to the
original intention.
https://github.com/apache/spark/blob/e04fd595370808bbf12b4c50980a86085fd20782/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala#L43-L44
> Rules here are in alphabetical order.
Currently, (1) it has several outliers in terms of ordering and (2) the
ordering is mixed with full class name and simple class name. For instance,
`AnsiCombinedTypeCoercionRule` should be placed at the second if we consider
simple class name. This PR makes it consistent via *full name* to fix the
inconsistency including the following.
https://github.com/apache/spark/blob/e04fd595370808bbf12b4c50980a86085fd20782/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala#L57-L59
https://github.com/apache/spark/blob/e04fd595370808bbf12b4c50980a86085fd20782/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala#L111-L116
https://github.com/apache/spark/blob/e04fd595370808bbf12b4c50980a86085fd20782/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala#L181-L182
https://github.com/apache/spark/blob/e04fd595370808bbf12b4c50980a86085fd20782/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala#L83
### Does this PR introduce _any_ user-facing change?
No behavior change.
### How was this patch tested?
Pass the CIs.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #52495 from dongjoon-hyun/SPARK-53773.
Authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../sql/catalyst/rules/RuleIdCollection.scala | 40 +++++++++++-----------
1 file changed, 20 insertions(+), 20 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index c68b8a2c29af..fd839b4c2127 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -51,20 +51,18 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAliases" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveBinaryArithmetic" ::
- "org.apache.spark.sql.catalyst.analysis.ResolveCollationName" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveDeserializer" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveEncodersInUDF" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveFunctions" ::
- "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveProcedures" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGroupingAnalytics" ::
- "org.apache.spark.sql.catalyst.analysis.ResolveHigherOrderFunctions" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveInsertInto" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveNaturalAndUsingJoin" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveNewInstance" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy"
::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOutputRelation"
::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolvePivot" ::
+ "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveProcedures" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRandomSeed" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences" ::
"org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations" ::
@@ -86,34 +84,36 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.analysis.DeduplicateRelations" ::
"org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases" ::
"org.apache.spark.sql.catalyst.analysis.EliminateUnions" ::
+ "org.apache.spark.sql.catalyst.analysis.ResolveCollationName" ::
"org.apache.spark.sql.catalyst.analysis.ResolveDefaultColumns" ::
+ "org.apache.spark.sql.catalyst.analysis.ResolveExecuteImmediate" ::
"org.apache.spark.sql.catalyst.analysis.ResolveExpressionsWithNamePlaceholders"
::
+ "org.apache.spark.sql.catalyst.analysis.ResolveGroupByAll" ::
+ "org.apache.spark.sql.catalyst.analysis.ResolveHigherOrderFunctions" ::
"org.apache.spark.sql.catalyst.analysis.ResolveHints$ResolveCoalesceHints" ::
"org.apache.spark.sql.catalyst.analysis.ResolveHints$ResolveJoinStrategyHints"
::
- "org.apache.spark.sql.catalyst.analysis.ResolveGroupByAll" ::
"org.apache.spark.sql.catalyst.analysis.ResolveInlineTables" ::
"org.apache.spark.sql.catalyst.analysis.ResolveLambdaVariables" ::
"org.apache.spark.sql.catalyst.analysis.ResolveLateralColumnAliasReference" ::
"org.apache.spark.sql.catalyst.analysis.ResolveOrderByAll" ::
"org.apache.spark.sql.catalyst.analysis.ResolveRowLevelCommandAssignments" ::
"org.apache.spark.sql.catalyst.analysis.ResolveSetVariable" ::
- "org.apache.spark.sql.catalyst.analysis.ResolveExecuteImmediate" ::
+ "org.apache.spark.sql.catalyst.analysis.ResolveTableConstraints" ::
"org.apache.spark.sql.catalyst.analysis.ResolveTableSpec" ::
"org.apache.spark.sql.catalyst.analysis.ResolveTimeZone" ::
"org.apache.spark.sql.catalyst.analysis.ResolveUnion" ::
+ "org.apache.spark.sql.catalyst.analysis.ResolveUnresolvedHaving" ::
+
"org.apache.spark.sql.catalyst.analysis.ResolveUpdateEventTimeWatermarkColumn"
::
"org.apache.spark.sql.catalyst.analysis.ResolveWindowTime" ::
"org.apache.spark.sql.catalyst.analysis.SessionWindowing" ::
"org.apache.spark.sql.catalyst.analysis.SubstituteUnresolvedOrdinals" ::
"org.apache.spark.sql.catalyst.analysis.TimeWindowing" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CombinedTypeCoercionRule"
::
- "org.apache.spark.sql.catalyst.analysis.UpdateOuterReferences" ::
"org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability" ::
-
"org.apache.spark.sql.catalyst.analysis.ResolveUpdateEventTimeWatermarkColumn"
::
+ "org.apache.spark.sql.catalyst.analysis.UpdateOuterReferences" ::
"org.apache.spark.sql.catalyst.expressions.EliminatePipeOperators" ::
-
"org.apache.spark.sql.catalyst.expressions.ValidateAndStripPipeExpressions" ::
- "org.apache.spark.sql.catalyst.analysis.ResolveUnresolvedHaving" ::
- "org.apache.spark.sql.catalyst.analysis.ResolveTableConstraints" ::
"org.apache.spark.sql.catalyst.expressions.ExtractSemiStructuredFields"
::
+
"org.apache.spark.sql.catalyst.expressions.ValidateAndStripPipeExpressions" ::
// Catalyst Optimizer rules
"org.apache.spark.sql.catalyst.optimizer.BooleanSimplification" ::
"org.apache.spark.sql.catalyst.optimizer.CollapseProject" ::
@@ -135,6 +135,8 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.optimizer.EliminateOuterJoin" ::
"org.apache.spark.sql.catalyst.optimizer.EliminateSerialization" ::
"org.apache.spark.sql.catalyst.optimizer.EliminateWindowPartitions" ::
+ "org.apache.spark.sql.catalyst.optimizer.EvalInlineTables" ::
+ "org.apache.spark.sql.catalyst.optimizer.GenerateOptimization" ::
"org.apache.spark.sql.catalyst.optimizer.InferWindowGroupLimit" ::
"org.apache.spark.sql.catalyst.optimizer.LikeSimplification" ::
"org.apache.spark.sql.catalyst.optimizer.LimitPushDown" ::
@@ -145,12 +147,12 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.optimizer.OptimizeCsvJsonExprs" ::
"org.apache.spark.sql.catalyst.optimizer.OptimizeIn" ::
"org.apache.spark.sql.catalyst.optimizer.OptimizeJoinCondition" ::
- "org.apache.spark.sql.catalyst.optimizer.OptimizeRand" ::
"org.apache.spark.sql.catalyst.optimizer.OptimizeOneRowPlan" ::
- "org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries" ::
+ "org.apache.spark.sql.catalyst.optimizer.OptimizeRand" ::
"org.apache.spark.sql.catalyst.optimizer.OptimizeRepartition" ::
- "org.apache.spark.sql.catalyst.optimizer.OptimizeWindowFunctions" ::
"org.apache.spark.sql.catalyst.optimizer.OptimizeUpdateFields"::
+ "org.apache.spark.sql.catalyst.optimizer.OptimizeWindowFunctions" ::
+ "org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries" ::
"org.apache.spark.sql.catalyst.optimizer.PropagateEmptyRelation" ::
"org.apache.spark.sql.catalyst.optimizer.PruneFilters" ::
"org.apache.spark.sql.catalyst.optimizer.PushDownLeftSemiAntiJoin" ::
@@ -159,27 +161,25 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.optimizer.PushLeftSemiLeftAntiThroughJoin" ::
"org.apache.spark.sql.catalyst.optimizer.ReassignLambdaVariableID" ::
"org.apache.spark.sql.catalyst.optimizer.RemoveLiteralFromGroupExpressions" ::
- "org.apache.spark.sql.catalyst.optimizer.GenerateOptimization" ::
"org.apache.spark.sql.catalyst.optimizer.RemoveNoopOperators" ::
"org.apache.spark.sql.catalyst.optimizer.RemoveRedundantAggregates" ::
"org.apache.spark.sql.catalyst.optimizer.RemoveRepetitionFromGroupExpressions"
::
"org.apache.spark.sql.catalyst.optimizer.ReorderAssociativeOperator" ::
"org.apache.spark.sql.catalyst.optimizer.ReorderJoin" ::
+ "org.apache.spark.sql.catalyst.optimizer.ReplaceDistinctWithAggregate" ::
"org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithAntiJoin" ::
"org.apache.spark.sql.catalyst.optimizer.ReplaceExceptWithFilter" ::
- "org.apache.spark.sql.catalyst.optimizer.ReplaceDistinctWithAggregate" ::
-
"org.apache.spark.sql.catalyst.optimizer.ReplaceNullWithFalseInPredicate" ::
"org.apache.spark.sql.catalyst.optimizer.ReplaceIntersectWithSemiJoin" ::
+
"org.apache.spark.sql.catalyst.optimizer.ReplaceNullWithFalseInPredicate" ::
+ "org.apache.spark.sql.catalyst.optimizer.RewriteAsOfJoin" ::
"org.apache.spark.sql.catalyst.optimizer.RewriteExceptAll" ::
"org.apache.spark.sql.catalyst.optimizer.RewriteIntersectAll" ::
- "org.apache.spark.sql.catalyst.optimizer.RewriteAsOfJoin" ::
"org.apache.spark.sql.catalyst.optimizer.SimplifyBinaryComparison" ::
"org.apache.spark.sql.catalyst.optimizer.SimplifyCaseConversionExpressions" ::
"org.apache.spark.sql.catalyst.optimizer.SimplifyCasts" ::
"org.apache.spark.sql.catalyst.optimizer.SimplifyConditionals" ::
"org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps" ::
"org.apache.spark.sql.catalyst.optimizer.TransposeWindow" ::
- "org.apache.spark.sql.catalyst.optimizer.EvalInlineTables" ::
"org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison"
:: Nil
}
@@ -187,12 +187,13 @@ object RuleIdCollection {
rulesNeedingIds = rulesNeedingIds ++ {
// In the production code path, the following rules are run in
CombinedTypeCoercionRule, and
// hence we only need to add them for unit testing.
-
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$PromoteStringLiterals"
::
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$DateTimeOperations" ::
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$GetDateFieldOperations"
::
+
"org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$PromoteStringLiterals"
::
"org.apache.spark.sql.catalyst.analysis.DecimalPrecision" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercion$BooleanEquality" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercion$DateTimeOperations"
::
+ "org.apache.spark.sql.catalyst.analysis.TypeCoercion$PromoteStrings" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CaseWhenCoercion" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$ConcatCoercion"
::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$Division" ::
@@ -203,7 +204,6 @@ object RuleIdCollection {
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$InConversion" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$IntegralDivision" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$MapZipWithCoercion" ::
- "org.apache.spark.sql.catalyst.analysis.TypeCoercion$PromoteStrings" ::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$StackCoercion"
::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$StringLiteralCoercion"
::
"org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$WindowFrameCoercion"
:: Nil
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]