This is an automated email from the ASF dual-hosted git repository. lijibing pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5f0b89f06ac [improvement](statistics)Agg table set preagg on when doing sample analyzing. (#49918) 5f0b89f06ac is described below commit 5f0b89f06ac12b46a37b032c553c5b11471a26c1 Author: James <lijib...@selectdb.com> AuthorDate: Thu Apr 24 16:56:48 2025 +0800 [improvement](statistics)Agg table set preagg on when doing sample analyzing. (#49918) ### What problem does this PR solve? This pr includes 3 changes. 1. Nereids support set ScanNode preagg on by hint, like this: select * from table1 /*+PREAGGOPEN*/ 2. When sample analyze agg table and mor unique table, set preagg on to improve performance. 3. Skip sample analyzing agg table and mor unique table's value columns. --- .../org/apache/doris/nereids/StatementContext.java | 9 + .../doris/nereids/jobs/executor/Analyzer.java | 4 +- .../doris/nereids/parser/LogicalPlanBuilder.java | 156 ++++++++------ .../org/apache/doris/nereids/rules/RuleType.java | 1 + .../doris/nereids/rules/analysis/BindRelation.java | 11 +- .../analysis/EliminateLogicalPreAggOnHint.java | 39 ++++ .../apache/doris/nereids/trees/plans/PlanType.java | 1 + .../trees/plans/commands/info/BaseViewInfo.java | 4 +- .../trees/plans/logical/LogicalPreAggOnHint.java | 89 ++++++++ .../nereids/trees/plans/visitor/PlanVisitor.java | 5 + .../apache/doris/statistics/AnalysisManager.java | 16 +- .../apache/doris/statistics/BaseAnalysisTask.java | 6 +- .../doris/statistics/ExternalAnalysisTask.java | 1 + .../apache/doris/statistics/OlapAnalysisTask.java | 15 +- .../doris/statistics/StatisticsAutoCollector.java | 23 ++- .../doris/statistics/util/StatisticsUtil.java | 20 ++ .../doris/statistics/OlapAnalysisTaskTest.java | 62 +++++- .../statistics/StatisticsAutoCollectorTest.java | 5 +- .../doris/statistics/util/StatisticsUtilTest.java | 57 ++++++ .../suites/statistics/analyze_stats.groovy | 3 +- .../suites/statistics/test_analyze_mv.groovy | 57 +----- .../test_skip_agg_table_value_column.groovy | 228 +++++++++++++++++++++ 22 files changed, 663 insertions(+), 149 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 2fe0c4486f0..cc09e260a88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -159,6 +159,7 @@ public class StatementContext implements Closeable { private final List<Expression> joinFilters = new ArrayList<>(); private final List<Hint> hints = new ArrayList<>(); + private boolean hintForcePreAggOn = false; // the columns in Plan.getExpressions(), such as columns in join condition or filter condition, group by expression private final Set<SlotReference> keySlots = Sets.newHashSet(); @@ -253,6 +254,14 @@ public class StatementContext implements Closeable { this.needLockTables = needLockTables; } + public void setHintForcePreAggOn(boolean preAggOn) { + this.hintForcePreAggOn = preAggOn; + } + + public boolean isHintForcePreAggOn() { + return hintForcePreAggOn; + } + /** * cache view info to avoid view's def and sql mode changed before lock it. * diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 98daf7425f5..ddb793c489b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -31,6 +31,7 @@ import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; import org.apache.doris.nereids.rules.analysis.CompressedMaterialize; import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; +import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.rules.analysis.FillUpMissingSlots; import org.apache.doris.nereids.rules.analysis.FillUpQualifyMissingSlot; @@ -96,7 +97,8 @@ public class Analyzer extends AbstractBatchJobExecutor { return jobs( // we should eliminate hint before "Subquery unnesting". topDown(new AnalyzeCTE()), - topDown(new EliminateLogicalSelectHint()), + topDown(new EliminateLogicalSelectHint(), + new EliminateLogicalPreAggOnHint()), bottomUp( new BindRelation(), new CheckPolicy() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 70e19d381a0..d54ee98b0cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -843,6 +843,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalOneRowRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; import org.apache.doris.nereids.trees.plans.logical.LogicalRepeat; @@ -2045,12 +2046,15 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { return selectPlan; } List<ParserRuleContext> selectHintContexts = Lists.newArrayList(); + List<ParserRuleContext> preAggOnHintContexts = Lists.newArrayList(); for (Integer key : selectHintMap.keySet()) { if (key > selectCtx.getStart().getStopIndex() && key < selectCtx.getStop().getStartIndex()) { selectHintContexts.add(selectHintMap.get(key)); + } else { + preAggOnHintContexts.add(selectHintMap.get(key)); } } - return withSelectHint(selectPlan, selectHintContexts); + return withHints(selectPlan, selectHintContexts, preAggOnHintContexts); }); } @@ -3706,82 +3710,102 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> { return tableList; } - private LogicalPlan withSelectHint(LogicalPlan logicalPlan, List<ParserRuleContext> hintContexts) { - if (hintContexts.isEmpty()) { + private LogicalPlan withHints(LogicalPlan logicalPlan, List<ParserRuleContext> selectHintContexts, + List<ParserRuleContext> preAggOnHintContexts) { + if (selectHintContexts.isEmpty() && preAggOnHintContexts.isEmpty()) { return logicalPlan; } - ImmutableList.Builder<SelectHint> hints = ImmutableList.builder(); - for (ParserRuleContext hintContext : hintContexts) { - SelectHintContext selectHintContext = (SelectHintContext) hintContext; - for (HintStatementContext hintStatement : selectHintContext.hintStatements) { - if (hintStatement.USE_MV() != null) { - hints.add(new SelectHintUseMv("USE_MV", getTableList(hintStatement.tableList), true)); - continue; - } else if (hintStatement.NO_USE_MV() != null) { - hints.add(new SelectHintUseMv("NO_USE_MV", getTableList(hintStatement.tableList), false)); - continue; - } - String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT); - switch (hintName) { - case "set_var": - Map<String, Optional<String>> parameters = Maps.newLinkedHashMap(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - Optional<String> value = Optional.empty(); - if (kv.constantValue != null) { - Literal literal = (Literal) visit(kv.constantValue); - value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue()); - } else if (kv.identifierValue != null) { - // maybe we should throw exception when the identifierValue is quoted identifier - value = Optional.ofNullable(kv.identifierValue.getText()); + LogicalPlan newPlan = logicalPlan; + if (!selectHintContexts.isEmpty()) { + ImmutableList.Builder<SelectHint> hints = ImmutableList.builder(); + for (ParserRuleContext hintContext : selectHintContexts) { + SelectHintContext selectHintContext = (SelectHintContext) hintContext; + for (HintStatementContext hintStatement : selectHintContext.hintStatements) { + if (hintStatement.USE_MV() != null) { + hints.add(new SelectHintUseMv("USE_MV", getTableList(hintStatement.tableList), true)); + continue; + } else if (hintStatement.NO_USE_MV() != null) { + hints.add(new SelectHintUseMv("NO_USE_MV", getTableList(hintStatement.tableList), false)); + continue; + } + String hintName = hintStatement.hintName.getText().toLowerCase(Locale.ROOT); + switch (hintName) { + case "set_var": + Map<String, Optional<String>> parameters = Maps.newLinkedHashMap(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + Optional<String> value = Optional.empty(); + if (kv.constantValue != null) { + Literal literal = (Literal) visit(kv.constantValue); + value = Optional.ofNullable(literal.toLegacyLiteral().getStringValue()); + } else if (kv.identifierValue != null) { + // maybe we should throw exception when the identifierValue is quoted identifier + value = Optional.ofNullable(kv.identifierValue.getText()); + } + parameters.put(parameterName, value); } - parameters.put(parameterName, value); } - } - SelectHintSetVar setVar = new SelectHintSetVar(hintName, parameters); - setVar.setVarOnceInSql(ConnectContext.get().getStatementContext()); - hints.add(setVar); - break; - case "leading": - List<String> leadingParameters = new ArrayList<>(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - leadingParameters.add(parameterName); + SelectHintSetVar setVar = new SelectHintSetVar(hintName, parameters); + setVar.setVarOnceInSql(ConnectContext.get().getStatementContext()); + hints.add(setVar); + break; + case "leading": + List<String> leadingParameters = new ArrayList<>(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + leadingParameters.add(parameterName); + } } - } - hints.add(new SelectHintLeading(hintName, leadingParameters)); - break; - case "ordered": - hints.add(new SelectHintOrdered(hintName)); - break; - case "use_cbo_rule": - List<String> useRuleParameters = new ArrayList<>(); - for (HintAssignmentContext kv : hintStatement.parameters) { - if (kv.key != null) { - String parameterName = visitIdentifierOrText(kv.key); - useRuleParameters.add(parameterName); + hints.add(new SelectHintLeading(hintName, leadingParameters)); + break; + case "ordered": + hints.add(new SelectHintOrdered(hintName)); + break; + case "use_cbo_rule": + List<String> useRuleParameters = new ArrayList<>(); + for (HintAssignmentContext kv : hintStatement.parameters) { + if (kv.key != null) { + String parameterName = visitIdentifierOrText(kv.key); + useRuleParameters.add(parameterName); + } } - } - hints.add(new SelectHintUseCboRule(hintName, useRuleParameters, false)); - break; - case "no_use_cbo_rule": - List<String> noUseRuleParameters = new ArrayList<>(); - for (HintAssignmentContext kv : hintStatement.parameters) { - String parameterName = visitIdentifierOrText(kv.key); - if (kv.key != null) { - noUseRuleParameters.add(parameterName); + hints.add(new SelectHintUseCboRule(hintName, useRuleParameters, false)); + break; + case "no_use_cbo_rule": + List<String> noUseRuleParameters = new ArrayList<>(); + for (HintAssignmentContext kv : hintStatement.parameters) { + String parameterName = visitIdentifierOrText(kv.key); + if (kv.key != null) { + noUseRuleParameters.add(parameterName); + } } + hints.add(new SelectHintUseCboRule(hintName, noUseRuleParameters, true)); + break; + default: + break; + } + } + } + newPlan = new LogicalSelectHint<>(hints.build(), newPlan); + } + if (!preAggOnHintContexts.isEmpty()) { + for (ParserRuleContext hintContext : preAggOnHintContexts) { + if (hintContext instanceof SelectHintContext) { + SelectHintContext preAggOnHintContext = (SelectHintContext) hintContext; + if (preAggOnHintContext.hintStatement != null + && preAggOnHintContext.hintStatement.hintName != null) { + String text = preAggOnHintContext.hintStatement.hintName.getText(); + if (text.equalsIgnoreCase("PREAGGOPEN")) { + newPlan = new LogicalPreAggOnHint<>(newPlan); + break; } - hints.add(new SelectHintUseCboRule(hintName, noUseRuleParameters, true)); - break; - default: - break; + } } } } - return new LogicalSelectHint<>(hints.build(), logicalPlan); + return newPlan; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index a020909be70..5b252402728 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -138,6 +138,7 @@ public enum RuleType { ELIMINATE_GROUP_BY_CONSTANT(RuleTypeClass.REWRITE), ELIMINATE_LOGICAL_SELECT_HINT(RuleTypeClass.REWRITE), + ELIMINATE_LOGICAL_PRE_AGG_ON_HINT(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_CONSTANT(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_UNDER_SUBQUERY(RuleTypeClass.REWRITE), ELIMINATE_ORDER_BY_UNDER_VIEW(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index dae2a8438d1..1ef7c981fcf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -96,6 +96,8 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.collections.CollectionUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.ArrayList; import java.util.List; @@ -105,6 +107,7 @@ import java.util.Optional; * Rule to bind relations in query plan. */ public class BindRelation extends OneAnalysisRuleFactory { + private static final Logger LOG = LogManager.getLogger(StatementContext.class); public BindRelation() {} @@ -179,7 +182,8 @@ public class BindRelation extends OneAnalysisRuleFactory { return getLogicalPlan(table, unboundRelation, tableQualifier, cascadesContext); } - private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List<String> qualifier) { + private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, List<String> qualifier, + CascadesContext cascadesContext) { LogicalOlapScan scan; List<Long> partIds = getPartitionIds(table, unboundRelation, qualifier); List<Long> tabletIds = unboundRelation.getTabletIds(); @@ -217,6 +221,9 @@ public class BindRelation extends OneAnalysisRuleFactory { // This tabletIds is set manually, so need to set specifiedTabletIds scan = scan.withManuallySpecifiedTabletIds(tabletIds); } + if (cascadesContext.getStatementContext().isHintForcePreAggOn()) { + return scan.withPreAggStatus(PreAggStatus.on()); + } if (needGenerateLogicalAggForRandomDistAggTable(scan)) { // it's a random distribution agg table // add agg on olap scan @@ -384,7 +391,7 @@ public class BindRelation extends OneAnalysisRuleFactory { switch (table.getType()) { case OLAP: case MATERIALIZED_VIEW: - return makeOlapScan(table, unboundRelation, qualifierWithoutTableName); + return makeOlapScan(table, unboundRelation, qualifierWithoutTableName, cascadesContext); case VIEW: View view = (View) table; isView = true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java new file mode 100644 index 00000000000..9f40f8e97b9 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/EliminateLogicalPreAggOnHint.java @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.rules.rewrite.OneRewriteRuleFactory; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; + +/** + * eliminate logical common hint and set them to cascade context + */ +public class EliminateLogicalPreAggOnHint extends OneRewriteRuleFactory { + + @Override + public Rule build() { + return logicalPreAggOnHint().thenApply(ctx -> { + LogicalPreAggOnHint<Plan> preAggHintPlan = ctx.root; + ctx.statementContext.setHintForcePreAggOn(true); + return preAggHintPlan.child(); + }).toRule(RuleType.ELIMINATE_LOGICAL_PRE_AGG_ON_HINT); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 0569a3ceb73..1ab6d5fd1e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -62,6 +62,7 @@ public enum PlanType { LOGICAL_APPLY, LOGICAL_ASSERT_NUM_ROWS, LOGICAL_CHECK_POLICY, + LOGICAL_COMMON_HINT, LOGICAL_CTE, LOGICAL_CTE_ANCHOR, LOGICAL_CTE_PRODUCER, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java index 8df0f83b0b6..a762b11c1f6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.rules.analysis.AnalyzeCTE; import org.apache.doris.nereids.rules.analysis.BindExpression; import org.apache.doris.nereids.rules.analysis.BindRelation; import org.apache.doris.nereids.rules.analysis.CheckPolicy; +import org.apache.doris.nereids.rules.analysis.EliminateLogicalPreAggOnHint; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -263,7 +264,8 @@ public class BaseViewInfo { private static List<RewriteJob> buildAnalyzeViewJobsForStar() { return jobs( - topDown(new EliminateLogicalSelectHint()), + topDown(new EliminateLogicalSelectHint(), + new EliminateLogicalPreAggOnHint()), topDown(new AnalyzeCTE()), bottomUp( new BindRelation(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java new file mode 100644 index 00000000000..079f6daabe6 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalPreAggOnHint.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.logical; + +import org.apache.doris.nereids.memo.GroupExpression; +import org.apache.doris.nereids.properties.LogicalProperties; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.plans.BlockFuncDepsPropagation; +import org.apache.doris.nereids.trees.plans.DiffOutputInAsterisk; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; +import java.util.Optional; + +/** + * PreAggOn hint. + */ +public class LogicalPreAggOnHint<CHILD_TYPE extends Plan> extends LogicalUnary<CHILD_TYPE> + implements BlockFuncDepsPropagation, DiffOutputInAsterisk { + + public LogicalPreAggOnHint(CHILD_TYPE child) { + this(Optional.empty(), Optional.empty(), child); + } + + /** + * LogicalCommonHint's full parameter constructor. + * @param groupExpression groupExpression exists when this plan is copy out from memo. + * @param logicalProperties logicalProperties is use for compute output + * @param child child plan + */ + public LogicalPreAggOnHint(Optional<GroupExpression> groupExpression, + Optional<LogicalProperties> logicalProperties, CHILD_TYPE child) { + super(PlanType.LOGICAL_COMMON_HINT, groupExpression, logicalProperties, child); + } + + @Override + public LogicalPreAggOnHint<Plan> withChildren(List<Plan> children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalPreAggOnHint<>(children.get(0)); + } + + @Override + public <R, C> R accept(PlanVisitor<R, C> visitor, C context) { + return visitor.visitLogicalPreAggOnHint(this, context); + } + + @Override + public List<? extends Expression> getExpressions() { + return ImmutableList.of(); + } + + @Override + public Plan withGroupExpression(Optional<GroupExpression> groupExpression) { + return new LogicalPreAggOnHint<>(groupExpression, Optional.of(getLogicalProperties()), child()); + } + + @Override + public Plan withGroupExprLogicalPropChildren(Optional<GroupExpression> groupExpression, + Optional<LogicalProperties> logicalProperties, List<Plan> children) { + Preconditions.checkArgument(children.size() == 1); + return new LogicalPreAggOnHint<>(groupExpression, logicalProperties, children.get(0)); + } + + @Override + public List<Slot> computeOutput() { + return child().getOutput(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java index f7642ce572c..d659c825a70 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/PlanVisitor.java @@ -39,6 +39,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalIntersect; import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalPartitionTopN; +import org.apache.doris.nereids.trees.plans.logical.LogicalPreAggOnHint; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalQualify; import org.apache.doris.nereids.trees.plans.logical.LogicalRelation; @@ -220,6 +221,10 @@ public abstract class PlanVisitor<R, C> implements CommandVisitor<R, C>, Relatio return visit(hint, context); } + public R visitLogicalPreAggOnHint(LogicalPreAggOnHint<? extends Plan> hint, C context) { + return visit(hint, context); + } + public R visitLogicalSetOperation(LogicalSetOperation setOperation, C context) { return visit(setOperation, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index c43adeccc41..8b34ac4de18 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -501,7 +501,13 @@ public class AnalysisManager implements Writable { long periodTimeInMs = stmt.getPeriodTimeInMs(); infoBuilder.setPeriodTimeInMs(periodTimeInMs); - Set<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + Set<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames).stream() + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toSet()); infoBuilder.setJobColumns(jobColumns); StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); for (Pair<String, String> pair : jobColumns) { @@ -574,7 +580,13 @@ public class AnalysisManager implements Writable { long periodTimeInMs = command.getPeriodTimeInMs(); infoBuilder.setPeriodTimeInMs(periodTimeInMs); - Set<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + Set<Pair<String, String>> jobColumns = table.getColumnIndexPairs(columnNames).stream() + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) + .collect(Collectors.toSet()); infoBuilder.setJobColumns(jobColumns); StringJoiner stringJoiner = new StringJoiner(",", "[", "]"); for (Pair<String, String> pair : jobColumns) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java index b547ff4146c..4b73971b778 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/BaseAnalysisTask.java @@ -96,7 +96,8 @@ public abstract class BaseAnalysisTask { + "SUBSTRING(CAST(${max} AS STRING), 1, 1024) AS `max`, " + "${dataSizeFunction} * ${scaleFactor} AS `data_size`, " + "NOW() FROM ( " - + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as t"; + + "SELECT * FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) " + + "as t ${preAggHint}"; protected static final String DUJ1_ANALYZE_TEMPLATE = "SELECT " + "CONCAT('${tblId}', '-', '${idxId}', '-', '${colId}') AS `id`, " @@ -117,7 +118,8 @@ public abstract class BaseAnalysisTask { + " SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` " + " FROM " + " (SELECT ${subStringColName} AS `colValue`, LENGTH(`${colName}`) as `len` " - + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0` " + + " FROM `${catalogName}`.`${dbName}`.`${tblName}` ${index} ${sampleHints} ${limit}) as `t0`" + + " ${preAggHint}" + " GROUP BY `t0`.`colValue` " + ") as `t1` "; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java index 4ea3d0ebc2c..72beb343956 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/ExternalAnalysisTask.java @@ -89,6 +89,7 @@ public class ExternalAnalysisTask extends BaseAnalysisTask { params.put("limit", ""); params.put("scaleFactor", "1"); params.put("index", ""); + params.put("preAggHint", ""); if (col != null) { params.put("type", col.getType().toString()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java index 5cee3db36a8..43990f42d0b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/OlapAnalysisTask.java @@ -136,13 +136,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask { } protected ResultRow collectMinMax() { - // Agg table value columns has no zone map. - // For these columns, skip collecting min and max value to avoid scan whole table. - if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) && !col.isKey()) { - LOG.info("Aggregation table {} column {} is not a key column, skip collecting min and max.", - tbl.getName(), col.getName()); - return null; - } long startTime = System.currentTimeMillis(); Map<String, String> params = buildSqlParams(); StringSubstitutor stringSubstitutor = new StringSubstitutor(params); @@ -264,6 +257,13 @@ public class OlapAnalysisTask extends BaseAnalysisTask { params.put("type", col.getType().toString()); params.put("limit", ""); + // For agg table and mor unique table, set PREAGGOPEN preAggHint. + if (((OlapTable) tbl).getKeysType().equals(KeysType.AGG_KEYS) + || ((OlapTable) tbl).getKeysType().equals(KeysType.UNIQUE_KEYS) + && !((OlapTable) tbl).isUniqKeyMergeOnWrite()) { + params.put("preAggHint", "/*+PREAGGOPEN*/"); + } + // If table row count is less than the target sample row count, simple scan the full table. if (tableRowCount <= targetSampleRows) { params.put("scaleFactor", "1"); @@ -388,6 +388,7 @@ public class OlapAnalysisTask extends BaseAnalysisTask { params.put("colName", StatisticsUtil.escapeColumnName(String.valueOf(info.colName))); params.put("tblName", String.valueOf(tbl.getName())); params.put("index", getIndex()); + params.put("preAggHint", ""); return params; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index f4fdc68f55c..8420e86fa8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -141,10 +141,20 @@ public class StatisticsAutoCollector extends MasterDaemon { protected void processOneJob(TableIf table, Set<Pair<String, String>> columns, JobPriority priority) throws DdlException { appendAllColumns(table, columns); - columns = columns.stream().filter( - c -> StatisticsUtil.needAnalyzeColumn(table, c) || StatisticsUtil.isLongTimeColumn(table, c)) + AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() + ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; + if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) { + analysisMethod = AnalysisMethod.FULL; + } + boolean isSampleAnalyze = analysisMethod.equals(AnalysisMethod.SAMPLE); + OlapTable olapTable = table instanceof OlapTable ? (OlapTable) table : null; + columns = columns.stream() + .filter(c -> StatisticsUtil.needAnalyzeColumn(table, c) || StatisticsUtil.isLongTimeColumn(table, c)) + .filter(c -> olapTable == null || StatisticsUtil.canCollectColumn( + olapTable.getIndexMetaByIndexId(olapTable.getIndexIdByName(c.first)).getColumnByName(c.second), + table, isSampleAnalyze, olapTable.getIndexIdByName(c.first))) .collect(Collectors.toSet()); - AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority); + AnalysisInfo analyzeJob = createAnalyzeJobForTbl(table, columns, priority, analysisMethod); if (analyzeJob == null) { return; } @@ -187,12 +197,7 @@ public class StatisticsAutoCollector extends MasterDaemon { } protected AnalysisInfo createAnalyzeJobForTbl( - TableIf table, Set<Pair<String, String>> jobColumns, JobPriority priority) { - AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - ? AnalysisMethod.SAMPLE : AnalysisMethod.FULL; - if (StatisticsUtil.enablePartitionAnalyze() && table.isPartitionedTable()) { - analysisMethod = AnalysisMethod.FULL; - } + TableIf table, Set<Pair<String, String>> jobColumns, JobPriority priority, AnalysisMethod analysisMethod) { AnalysisManager manager = Env.getServingEnv().getAnalysisManager(); TableStatsMeta tableStatsStatus = manager.findTableStatsStatus(table.getId()); if (table instanceof OlapTable && analysisMethod.equals(AnalysisMethod.SAMPLE)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 1800f92148a..7ae7ee8981b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -37,6 +37,7 @@ import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -755,6 +756,25 @@ public class StatisticsUtil { || type instanceof AggStateType; } + public static boolean canCollectColumn(Column c, TableIf table, boolean isSampleAnalyze, long indexId) { + // Full analyze can collect all columns. + if (!isSampleAnalyze) { + return true; + } + // External table can collect all columns. + if (!(table instanceof OlapTable)) { + return true; + } + OlapTable olapTable = (OlapTable) table; + // Skip agg table value columns + KeysType keysType = olapTable.getIndexMetaByIndexId(indexId).getKeysType(); + if (KeysType.AGG_KEYS.equals(keysType) && !c.isKey()) { + return false; + } + // Skip mor unique table value columns + return !KeysType.UNIQUE_KEYS.equals(keysType) || olapTable.isUniqKeyMergeOnWrite() || c.isKey(); + } + public static void sleep(long millis) { try { Thread.sleep(millis); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java index 1404855f092..f9695389715 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/OlapAnalysisTaskTest.java @@ -22,6 +22,7 @@ import org.apache.doris.analysis.TableSample; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.DataProperty; import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.MaterializedIndex; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Partition; @@ -145,7 +146,7 @@ public class OlapAnalysisTaskTest { + "${ndvFunction} as `ndv`, ROUND(SUM(CASE WHEN `null` IS NULL THEN 1 ELSE 0 END) * ${scaleFactor}) AS `null_count`, " + "SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `max`, " + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() FROM " - + "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t", sql); + + "( SELECT * FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as t ", sql); return; } }; @@ -173,7 +174,8 @@ public class OlapAnalysisTaskTest { + "COUNT(1) * 4 * ${scaleFactor} AS `data_size`, NOW() " + "FROM ( SELECT t0.`colValue` as `column_key`, COUNT(1) as `count`, SUM(`len`) as `column_length` " + "FROM (SELECT ${subStringColName} AS `colValue`, LENGTH(`null`) as `len` " - + "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` GROUP BY `t0`.`colValue` ) as `t1` ", sql); + + "FROM `catalogName`.`${dbName}`.`null` ${sampleHints} ${limit}) as `t0` " + + " GROUP BY `t0`.`colValue` ) as `t1` ", sql); return; } @@ -346,22 +348,70 @@ public class OlapAnalysisTaskTest { return false; } }; - task.col = new Column("test", PrimitiveType.INT); + + new MockUp<OlapTable>() { + @Mock + public KeysType getKeysType() { + return KeysType.DUP_KEYS; + } + }; + task.col = new Column("testColumn", Type.INT, true, null, null, ""); + task.setTable(new OlapTable()); task.getSampleParams(params, 10); Assertions.assertTrue(task.scanFullTable()); Assertions.assertEquals("1", params.get("scaleFactor")); Assertions.assertEquals("", params.get("sampleHints")); Assertions.assertEquals("ROUND(NDV(`${colName}`) * ${scaleFactor})", params.get("ndvFunction")); + Assertions.assertNull(params.get("preAggHint")); params.clear(); + new MockUp<OlapTable>() { + @Mock + public KeysType getKeysType() { + return KeysType.AGG_KEYS; + } + }; task = new OlapAnalysisTask(); - task.col = new Column("test", PrimitiveType.INT); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); Assertions.assertEquals("SUM(`t1`.`count`) * COUNT(1) / (SUM(`t1`.`count`) - SUM(IF(`t1`.`count` = 1, 1, 0)) + SUM(IF(`t1`.`count` = 1, 1, 0)) * SUM(`t1`.`count`) / 1000)", params.get("ndvFunction")); Assertions.assertEquals("SUM(t1.count) * 4", params.get("dataSizeFunction")); Assertions.assertEquals("`${colName}`", params.get("subStringColName")); + Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint")); + params.clear(); + + new MockUp<OlapTable>() { + @Mock + public KeysType getKeysType() { + return KeysType.UNIQUE_KEYS; + } + + @Mock + public boolean isUniqKeyMergeOnWrite() { + return false; + } + }; + task = new OlapAnalysisTask(); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); + task.getSampleParams(params, 1000); + Assertions.assertEquals("/*+PREAGGOPEN*/", params.get("preAggHint")); + params.clear(); + + new MockUp<OlapTable>() { + @Mock + public boolean isUniqKeyMergeOnWrite() { + return true; + } + }; + task = new OlapAnalysisTask(); + task.col = new Column("testColumn", Type.INT, false, null, null, ""); + task.setTable(new OlapTable()); + task.getSampleParams(params, 1000); + Assertions.assertNull(params.get("preAggHint")); params.clear(); new MockUp<OlapAnalysisTask>() { @@ -378,6 +428,7 @@ public class OlapAnalysisTaskTest { task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -392,6 +443,7 @@ public class OlapAnalysisTaskTest { }; task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("10.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -411,6 +463,7 @@ public class OlapAnalysisTaskTest { }; task = new OlapAnalysisTask(); task.col = new Column("test", PrimitiveType.INT); + task.setTable(new OlapTable()); task.getSampleParams(params, 1000); Assertions.assertEquals("20.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); @@ -422,6 +475,7 @@ public class OlapAnalysisTaskTest { task.col = new Column("test", Type.fromPrimitiveType(PrimitiveType.INT), true, null, null, null); task.setKeyColumnSampleTooManyRows(true); + task.setTable(new OlapTable()); task.getSampleParams(params, 2000000000); Assertions.assertEquals("2.0", params.get("scaleFactor")); Assertions.assertEquals("TABLET(1, 2)", params.get("sampleHints")); diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java index 1d9ea4bd4eb..305eb201dc0 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/StatisticsAutoCollectorTest.java @@ -29,6 +29,7 @@ import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.hive.HMSExternalTable.DLAType; import org.apache.doris.datasource.jdbc.JdbcExternalDatabase; import org.apache.doris.datasource.jdbc.JdbcExternalTable; +import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod; import mockit.Mock; import mockit.MockUp; @@ -163,13 +164,13 @@ public class StatisticsAutoCollectorTest { return false; } }; - Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null)); + Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null, AnalysisMethod.SAMPLE)); new MockUp<OlapTable>() { @Mock public long getRowCountForIndex(long indexId, boolean strict) { return 100; } }; - Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null)); + Assertions.assertNull(collector.createAnalyzeJobForTbl(table, null, null, AnalysisMethod.SAMPLE)); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index 7514d96ddb5..0467f9b89dd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -18,6 +18,8 @@ package org.apache.doris.statistics.util; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndexMeta; import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.TableProperty; @@ -43,6 +45,7 @@ import org.apache.doris.statistics.AnalysisManager; import org.apache.doris.statistics.ColStatsMeta; import org.apache.doris.statistics.ResultRow; import org.apache.doris.statistics.TableStatsMeta; +import org.apache.doris.thrift.TStorageType; import com.google.common.collect.Lists; import mockit.Mock; @@ -501,4 +504,58 @@ class StatisticsUtilTest { }; Assertions.assertTrue(StatisticsUtil.isLongTimeColumn(table, Pair.of("index", column.getName()))); } + + @Test + void testCanCollectColumn() { + Column column = new Column("testColumn", Type.INT, true, null, null, ""); + List<Column> schema = new ArrayList<>(); + schema.add(column); + OlapTable table = new OlapTable(200, "testTable", schema, KeysType.AGG_KEYS, null, null); + + // Test full analyze always return true; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, false, 1)); + + // Test null table return true; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, null, true, 1)); + + // Test external table always return true; + HMSExternalCatalog externalCatalog = new HMSExternalCatalog(); + HMSExternalDatabase externalDatabase = new HMSExternalDatabase(externalCatalog, 1L, "dbName", "dbName"); + HMSExternalTable hmsTable = new HMSExternalTable(1, "name", "name", externalCatalog, externalDatabase); + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, hmsTable, true, 1)); + + // Test agg key return true; + MaterializedIndexMeta meta = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.AGG_KEYS, null); + new MockUp<OlapTable>() { + @Mock + public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) { + return meta; + } + }; + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test agg value return false + column = new Column("testColumn", Type.INT, false, null, null, ""); + Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test unique mor value column return false + MaterializedIndexMeta meta1 = new MaterializedIndexMeta(1L, schema, 1, 1, (short) 1, TStorageType.COLUMN, KeysType.UNIQUE_KEYS, null); + new MockUp<OlapTable>() { + @Mock + public MaterializedIndexMeta getIndexMetaByIndexId(long indexId) { + return meta1; + } + + @Mock + public boolean isUniqKeyMergeOnWrite() { + return false; + } + }; + Assertions.assertFalse(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + // Test unique mor key column return true + column = new Column("testColumn", Type.INT, true, null, null, ""); + Assertions.assertTrue(StatisticsUtil.canCollectColumn(column, table, true, 1)); + + } } diff --git a/regression-test/suites/statistics/analyze_stats.groovy b/regression-test/suites/statistics/analyze_stats.groovy index e57bef056fc..f5563d56980 100644 --- a/regression-test/suites/statistics/analyze_stats.groovy +++ b/regression-test/suites/statistics/analyze_stats.groovy @@ -265,8 +265,7 @@ suite("test_analyze") { sql """analyze table agg_table_test with sample rows 100 with sync""" def agg_result = sql """show column stats agg_table_test (name)""" logger.info("show column agg_table_test(name) stats: " + agg_result) - assertEquals(agg_result[0][7], "N/A") - assertEquals(agg_result[0][8], "N/A") + assertEquals(0, agg_result.size()) // Continue test partition load data for the first time. def reported = false; diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index aafac346f5a..0e1d7a8fa78 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -540,6 +540,7 @@ suite("test_analyze_mv") { assertEquals("4", result_row[0][3]) assertEquals("4", result_row[0][4]) + sql """drop stats mvTestDup""" sql """analyze table mvTestDup with sample rows 4000000""" wait_analyze_finish("mvTestDup") result_sample = sql """SHOW ANALYZE mvTestDup;""" @@ -548,7 +549,7 @@ suite("test_analyze_mv") { logger.info("Analyze job id is " + jobId) result_sample = sql """show column stats mvTestDup""" - assertEquals(12, result_sample.size()) + assertEquals(9, result_sample.size()) result_sample = sql """show column stats mvTestDup(key1)""" logger.info("result " + result_sample) @@ -610,60 +611,18 @@ suite("test_analyze_mv") { result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS bigint)`)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_SUM__CAST(``value1`` AS bigint)`)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_SUM__CAST(`value1` AS bigint)", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("6", result_sample[0][7]) - assertEquals("3001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_MAX__``value2```)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_MAX__`value2`", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("4", result_sample[0][7]) - assertEquals("4001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" logger.info("result " + result_sample) - if ("MANUAL" != result_sample[0][11]) { - logger.info("Overwrite by auto analyze, analyze it again.") - sql """analyze table mvTestDup with sync with sample rows 4000000""" - result_sample = sql """show column stats mvTestDup(`mva_MIN__``value3```)""" - logger.info("result after reanalyze " + result_sample) - } - assertEquals(1, result_sample.size()) - assertEquals("mva_MIN__`value3`", result_sample[0][0]) - assertEquals("mv3", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) - assertEquals("4.0", result_sample[0][3]) - assertEquals("5", result_sample[0][7]) - assertEquals("5001", result_sample[0][8]) - assertEquals("SAMPLE", result_sample[0][9]) - assertEquals("MANUAL", result_sample[0][11]) + assertEquals(0, result_sample.size()) result_sample = sql """show analyze task status ${jobId}""" - assertEquals(12, result_sample.size()) + assertEquals(9, result_sample.size()) def verifyTaskStatus = { result, colName, indexName -> def found = false; for (int i = 0; i < result.size(); i++) { @@ -683,9 +642,6 @@ suite("test_analyze_mv") { verifyTaskStatus(result_sample, "mv_key1", "mv3") verifyTaskStatus(result_sample, "mv_key2", "mv2") verifyTaskStatus(result_sample, "mv_key2", "mv3") - verifyTaskStatus(result_sample, "mva_MAX__`value2`", "mv3") - verifyTaskStatus(result_sample, "mva_MIN__`value3`", "mv3") - verifyTaskStatus(result_sample, "mva_SUM__CAST(`value1` AS bigint)", "mv3") // * Test row count report and report for nereids sql """truncate table mvTestDup""" @@ -810,4 +766,3 @@ suite("test_analyze_mv") { sql """drop database if exists test_analyze_mv""" } - diff --git a/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy b/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy new file mode 100644 index 00000000000..3ae8aae15cc --- /dev/null +++ b/regression-test/suites/statistics/test_skip_agg_table_value_column.groovy @@ -0,0 +1,228 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_skip_agg_table_value_column") { + + def wait_row_count_reported = { db, table, row, column, expected -> + def result = sql """show frontends;""" + logger.info("show frontends result origin: " + result) + def host + def port + for (int i = 0; i < result.size(); i++) { + if (result[i][8] == "true") { + host = result[i][1] + port = result[i][4] + } + } + def tokens = context.config.jdbcUrl.split('/') + def url=tokens[0] + "//" + host + ":" + port + logger.info("Master url is " + url) + connect(context.config.jdbcUser, context.config.jdbcPassword, url) { + sql """use ${db}""" + result = sql """show frontends;""" + logger.info("show frontends result master: " + result) + for (int i = 0; i < 120; i++) { + Thread.sleep(5000) + result = sql """SHOW DATA FROM ${table};""" + logger.info("result " + result) + if (result[row][column] == expected) { + return; + } + } + throw new Exception("Row count report timeout.") + } + + } + + sql """drop database if exists test_skip_agg_table_value_column""" + sql """create database test_skip_agg_table_value_column""" + sql """use test_skip_agg_table_value_column""" + sql """set global force_sample_analyze=false""" + sql """set global enable_auto_analyze=false""" + + // Test duplicate table + sql """CREATE TABLE dup ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + DUPLICATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ) + """ + createMV("create materialized view mv1 as select key1 from dup;") + createMV("create materialized view mv2 as select key2 from dup;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from dup group by key1, key2;") + sql """insert into dup values (1, 2, 3, 4, 5), (1, 2, 3, 4, 5), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + // Test aggregate table. + sql """CREATE TABLE agg ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int SUM NOT NULL, + value2 int MAX NOT NULL, + value3 int MIN NOT NULL + )ENGINE=OLAP + AGGREGATE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key2 from agg group by key2;") + createMV("create materialized view mv3 as select key1, key2, sum(value1), max(value2), min(value3) from agg group by key1, key2;") + createMV("create materialized view mv6 as select key1, sum(value1) from agg group by key1;") + sql """insert into agg values (1, 2, 3, 4, 5), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + // Test unique table + sql """ + CREATE TABLE uni_mor ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = false, + "replication_num" = "1" + ); + """ + sql """ + CREATE TABLE uni_mow ( + key1 bigint NOT NULL, + key2 bigint NOT NULL, + value1 int NOT NULL, + value2 int NOT NULL, + value3 int NOT NULL + )ENGINE=OLAP + UNIQUE KEY(`key1`, `key2`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`key1`) BUCKETS 2 + PROPERTIES ( + "enable_unique_key_merge_on_write" = true, + "replication_num" = "1" + ); + """ + createMV("create materialized view mv1 as select key1, key2 from uni_mor;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mor;") + sql """insert into uni_mor values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + createMV("create materialized view mv1 as select key1, key2 from uni_mow;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from uni_mow;") + sql """insert into uni_mow values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" + + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 0, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 1, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 2, 4, "6") + wait_row_count_reported("test_skip_agg_table_value_column", "dup", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "agg", 3, 4, "4") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mor", 2, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 0, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 1, 4, "5") + wait_row_count_reported("test_skip_agg_table_value_column", "uni_mow", 2, 4, "5") + + sql """analyze table dup with sync""" + def result = sql """show column stats dup""" + assertEquals(12, result.size()) + sql """drop stats dup""" + result = sql """show column stats dup""" + assertEquals(0, result.size()) + sql """analyze table dup with sync with sample rows 400000""" + result = sql """show column stats dup""" + assertEquals(9, result.size()) + sql """drop stats dup""" + result = sql """show column stats dup""" + assertEquals(0, result.size()) + + sql """analyze table agg with sync""" + result = sql """show column stats agg""" + assertEquals(13, result.size()) + sql """drop stats agg""" + result = sql """show column stats agg""" + assertEquals(0, result.size()) + sql """analyze table agg with sync with sample rows 400000""" + result = sql """show column stats agg""" + assertEquals(6, result.size()) + sql """drop stats agg""" + result = sql """show column stats agg""" + assertEquals(0, result.size()) + + sql """analyze table uni_mor with sync""" + result = sql """show column stats uni_mor""" + assertEquals(11, result.size()) + sql """drop stats uni_mor""" + result = sql """show column stats uni_mor""" + assertEquals(0, result.size()) + sql """analyze table uni_mor with sync with sample rows 400000""" + result = sql """show column stats uni_mor""" + assertEquals(6, result.size()) + sql """drop stats uni_mor""" + result = sql """show column stats uni_mor""" + assertEquals(0, result.size()) + + sql """analyze table uni_mow with sync""" + result = sql """show column stats uni_mow""" + assertEquals(11, result.size()) + sql """drop stats uni_mow""" + result = sql """show column stats uni_mow""" + assertEquals(0, result.size()) + sql """analyze table uni_mow with sync with sample rows 400000""" + result = sql """show column stats uni_mow""" + assertEquals(11, result.size()) + sql """drop stats uni_mow""" + result = sql """show column stats uni_mow""" + assertEquals(0, result.size()) + + // Test PREAGGOPEN hint. + explain { + sql("SELECT CONCAT(1744255158798, '-', 1744255158812, '-', 'mv_key2') AS `id`, 0 AS `catalog_id`, 1744255158742 AS `db_id`, 1744255158798 AS `tbl_id`, 1744255158812 AS `idx_id`, 'mv_key2' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key2`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key2` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_si [...] + contains "PREAGGREGATION: ON" + notContains "PREAGGREGATION: OFF" + } + explain { + sql("SELECT CONCAT(1744255158798, '-', 1744255158812, '-', 'mv_key2') AS `id`, 0 AS `catalog_id`, 1744255158742 AS `db_id`, 1744255158798 AS `tbl_id`, 1744255158812 AS `idx_id`, 'mv_key2' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key2`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key2` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('2' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('2001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_si [...] + contains "PREAGGREGATION: OFF" + notContains "PREAGGREGATION: ON" + } + explain { + sql("SELECT CONCAT(1744255159211, '-', 1744255159224, '-', 'mv_key1') AS `id`, 0 AS `catalog_id`, 1744255159182 AS `db_id`, 1744255159211 AS `tbl_id`, 1744255159224 AS `idx_id`, 'mv_key1' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key1`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key1` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('1001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_si [...] + contains "PREAGGREGATION: ON" + notContains "PREAGGREGATION: OFF" + } + explain { + sql("SELECT CONCAT(1744255159211, '-', 1744255159224, '-', 'mv_key1') AS `id`, 0 AS `catalog_id`, 1744255159182 AS `db_id`, 1744255159211 AS `tbl_id`, 1744255159224 AS `idx_id`, 'mv_key1' AS `col_id`, NULL AS `part_id`, 5 AS `row_count`, ROUND(NDV(`mv_key1`) * 1) as `ndv`, ROUND(SUM(CASE WHEN `mv_key1` IS NULL THEN 1 ELSE 0 END) * 1) AS `null_count`, SUBSTRING(CAST('1' AS STRING), 1, 1024) AS `min`, SUBSTRING(CAST('1001' AS STRING), 1, 1024) AS `max`, COUNT(1) * 8 * 1 AS `data_si [...] + contains "PREAGGREGATION: OFF" + notContains "PREAGGREGATION: ON" + } + + sql """drop database if exists test_skip_agg_table_value_column""" +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org