This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 62214cd1f4 [feature](nereids) adjust min/max of column stats for cast function (#21772) 62214cd1f4 is described below commit 62214cd1f4762a754d4e51b181bcab6b1a2b60c0 Author: minghong <engle...@gmail.com> AuthorDate: Fri Jul 14 12:54:04 2023 +0800 [feature](nereids) adjust min/max of column stats for cast function (#21772) cast(A as date), where A is a string column. the min/max of result column stats should be calc like this: convert A.minExpr to a date dateA, and then get double value from dateA. add "explain memo plan select ..." to print memo from mysql client dump column stats for FileScanNode, used in datalake. --- .../antlr4/org/apache/doris/nereids/DorisLexer.g4 | 1 + .../antlr4/org/apache/doris/nereids/DorisParser.g4 | 1 + .../org/apache/doris/nereids/NereidsPlanner.java | 4 +++ .../java/org/apache/doris/nereids/memo/Memo.java | 11 +++++--- .../doris/nereids/parser/LogicalPlanBuilder.java | 3 +++ .../doris/nereids/stats/ExpressionEstimation.java | 31 +++++++++++++++++++++- .../trees/plans/commands/ExplainCommand.java | 1 + .../apache/doris/statistics/ColumnStatistic.java | 4 +-- 8 files changed, 49 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 78071820ff..2f23376d0a 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -251,6 +251,7 @@ MATCH_ANY: 'MATCH_ANY'; MATCH_ALL: 'MATCH_ALL'; MATCH_PHRASE: 'MATCH_PHRASE'; MATCHED: 'MATCHED'; +MEMO:'MEMO'; MERGE: 'MERGE'; MINUTE: 'MINUTE'; MONTH: 'MONTH'; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 3fd8e8b1af..c32a09f3f6 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -78,6 +78,7 @@ planType | REWRITTEN | LOGICAL // same type | OPTIMIZED | PHYSICAL // same type | SHAPE + | MEMO | ALL // default type ; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index bf74eceee3..25064c1c68 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -408,6 +408,10 @@ public class NereidsPlanner extends Planner { return "cost = " + cost + "\n" + optimizedPlan.treeString(); case SHAPE_PLAN: return optimizedPlan.shape(""); + case MEMO_PLAN: + return cascadesContext.getMemo().toString() + + "\n\n========== OPTIMIZED PLAN ==========\n" + + optimizedPlan.treeString(); case ALL_PLAN: return "========== PARSED PLAN ==========\n" + parsedPlan.treeString() + "\n\n" diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java index 42acda2511..643ffe819c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/Memo.java @@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.plans.GroupPlan; import org.apache.doris.nereids.trees.plans.LeafPlan; import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; @@ -737,10 +738,12 @@ public class Memo { builder.append("\n\n").append(group); builder.append(" stats=").append(group.getStatistics()).append("\n"); Statistics stats = group.getStatistics(); - if (stats != null && !group.getLogicalExpressions().isEmpty() - && group.getLogicalExpressions().get(0).getPlan() instanceof LogicalOlapScan) { - for (Entry e : stats.columnStatistics().entrySet()) { - builder.append(" ").append(e.getKey()).append(":").append(e.getValue()).append("\n"); + if (stats != null && !group.getLogicalExpressions().isEmpty()) { + Plan plan = group.getLogicalExpressions().get(0).getPlan(); + if (plan instanceof LogicalOlapScan || plan instanceof LogicalFileScan) { + for (Entry e : stats.columnStatistics().entrySet()) { + builder.append(" ").append(e.getKey()).append(":").append(e.getValue()).append("\n"); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index dffc10d7df..d4c044ad73 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -1901,6 +1901,9 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { if (planTypeContext.SHAPE() != null) { return ExplainLevel.SHAPE_PLAN; } + if (planTypeContext.MEMO() != null) { + return ExplainLevel.MEMO_PLAN; + } return ExplainLevel.ALL_PLAN; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index eca7511684..9a3c54f739 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -18,6 +18,8 @@ package org.apache.doris.nereids.stats; import org.apache.doris.analysis.ArithmeticExpr.Operator; +import org.apache.doris.analysis.StringLiteral; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Add; import org.apache.doris.nereids.trees.expressions.AggregateExpression; import org.apache.doris.nereids.trees.expressions.Alias; @@ -84,6 +86,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.Year; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsAdd; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.YearsSub; +import org.apache.doris.nereids.trees.expressions.literal.DateLiteral; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.DataType; @@ -140,12 +143,38 @@ public class ExpressionEstimation extends ExpressionVisitor<ColumnStatistic, Sta return columnStat.build(); } + @Override public ColumnStatistic visitCast(Cast cast, Statistics context) { ColumnStatistic stats = context.findColumnStatistics(cast); if (stats != null) { return stats; } - return cast.child().accept(this, context); + ColumnStatistic childColStats = cast.child().accept(this, context); + + return castMinMax(childColStats, cast.getDataType()); + } + + private ColumnStatistic castMinMax(ColumnStatistic colStats, DataType targetType) { + if (colStats.minExpr instanceof StringLiteral && targetType.isDateLikeType()) { + ColumnStatisticBuilder builder = new ColumnStatisticBuilder(colStats); + if (colStats.minExpr != null && colStats.maxExpr != null) { + String strMin = colStats.minExpr.getStringValue(); + try { + DateLiteral dateMinLiteral = new DateLiteral(strMin); + long min = dateMinLiteral.getValue(); + builder.setMinValue(min); + + String strMax = colStats.maxExpr.getStringValue(); + DateLiteral dateMaxLiteral = new DateLiteral(strMax); + long max = dateMaxLiteral.getValue(); + builder.setMaxValue(max); + } catch (AnalysisException e) { + // ignore exception. do not convert min max + } + } + return builder.build(); + } + return colStats; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java index 820e3861d8..bfb0e82a17 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java @@ -46,6 +46,7 @@ public class ExplainCommand extends Command implements NoForward { REWRITTEN_PLAN(true), OPTIMIZED_PLAN(true), SHAPE_PLAN(true), + MEMO_PLAN(true), ALL_PLAN(true) ; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java index 48277b3c51..d791ee1e0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ColumnStatistic.java @@ -314,8 +314,8 @@ public class ColumnStatistic { @Override public String toString() { - return isUnKnown ? "unKnown" : String.format("ndv=%.4f, min=%f, max=%f, sel=%f, count=%.4f", - ndv, minValue, maxValue, selectivity, count); + return isUnKnown ? "unknown" : String.format("ndv=%.4f, min=%f(%s), max=%f(%s), count=%.4f", + ndv, minValue, minExpr, maxValue, maxExpr, count); } public JSONObject toJson() { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org