This is an automated email from the ASF dual-hosted git repository. englefly pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 5286b901c17 [feat](nereids)Compressed materialize for aggregate and sort (#42408) 5286b901c17 is described below commit 5286b901c175071dc438456b2d63c91e63eacaba Author: minghong <zhoumingh...@selectdb.com> AuthorDate: Tue Nov 12 16:33:16 2024 +0800 [feat](nereids)Compressed materialize for aggregate and sort (#42408) ## Proposed changes if A is a short string column (len<4) 1. 'select A from T group by A' => 'select decodeAsChar(encode_as_int(A)) from T group by encode_as_int(A)' 2. 'select * from T order by A' => 'select * from T order by encode_as_int(A)' for other sizes, short string will be wrapped by encode_as_smallint/encode_as_bigint/encode_as_largeInt session variable ENABLE_COMPRESS_MATERIALIZE is used to swith on/off this optimization Issue Number: close #xxx <!--Describe your changes.--> --- .../doris/nereids/jobs/executor/Analyzer.java | 2 + .../org/apache/doris/nereids/rules/RuleType.java | 2 + .../rules/analysis/CompressedMaterialize.java | 166 +++++++++++++++++ .../rewrite/PushDownFilterThroughProject.java | 57 +++++- .../functions/scalar/EncodeAsBigInt.java | 2 +- .../expressions/functions/scalar/EncodeAsInt.java | 2 +- .../functions/scalar/EncodeAsLargeInt.java | 2 +- .../functions/scalar/EncodeAsSmallInt.java | 2 +- .../functions/scalar/EncodeStrToInteger.java | 24 +++ .../plans/physical/PhysicalHashAggregate.java | 7 + .../java/org/apache/doris/qe/SessionVariable.java | 9 + .../compress_materialize/compress_materialize.out | 55 ++++++ .../compress_materialize.groovy | 198 +++++++++++++++++++++ 13 files changed, 520 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java index 894d4264201..8985dadc0bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java @@ -31,6 +31,7 @@ import org.apache.doris.nereids.rules.analysis.CheckAnalysis; import org.apache.doris.nereids.rules.analysis.CheckPolicy; import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint; import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias; +import org.apache.doris.nereids.rules.analysis.CompressedMaterialize; import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant; import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant; import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint; @@ -166,6 +167,7 @@ public class Analyzer extends AbstractBatchJobExecutor { topDown(new EliminateGroupByConstant()), topDown(new SimplifyAggGroupBy()), + topDown(new CompressedMaterialize()), topDown(new NormalizeAggregate()), topDown(new HavingToFilter()), topDown(new QualifyToFilter()), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java index dbf96ef2f1f..beb8bd43655 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java @@ -106,6 +106,8 @@ public enum RuleType { CHECK_DATA_TYPES(RuleTypeClass.CHECK), // rewrite rules + COMPRESSED_MATERIALIZE_AGG(RuleTypeClass.REWRITE), + COMPRESSED_MATERIALIZE_SORT(RuleTypeClass.REWRITE), NORMALIZE_AGGREGATE(RuleTypeClass.REWRITE), NORMALIZE_SORT(RuleTypeClass.REWRITE), NORMALIZE_REPEAT(RuleTypeClass.REWRITE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java new file mode 100644 index 00000000000..7d8a7664f82 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.analysis; + +import org.apache.doris.nereids.properties.OrderKey; +import org.apache.doris.nereids.rules.Rule; +import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.expressions.Alias; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.NamedExpression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsBigInt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsInt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsLargeInt; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsSmallInt; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalSort; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.coercion.CharacterType; +import org.apache.doris.nereids.util.ExpressionUtils; +import org.apache.doris.qe.ConnectContext; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +/** + * convert string to int in order to improve performance for aggregation and sorting. + * + * 1. AGG + * select A from T group by A + * => + * select DecodeAsVarchar(encode_as_int(A)) from T group by encode_as_int(A) + * + * 2. Sort + * select * from T order by A + * => + * select * from T order by encode_as_int(A) + */ +public class CompressedMaterialize implements AnalysisRuleFactory { + @Override + public List<Rule> buildRules() { + return ImmutableList.of( + RuleType.COMPRESSED_MATERIALIZE_AGG.build( + logicalAggregate().when(a -> ConnectContext.get() != null + && ConnectContext.get().getSessionVariable().enableCompressMaterialize) + .then(this::compressedMaterializeAggregate)), + RuleType.COMPRESSED_MATERIALIZE_SORT.build( + logicalSort().when(a -> ConnectContext.get() != null + && ConnectContext.get().getSessionVariable().enableCompressMaterialize) + .then(this::compressMaterializeSort) + ) + ); + } + + private LogicalSort<Plan> compressMaterializeSort(LogicalSort<Plan> sort) { + List<OrderKey> newOrderKeys = Lists.newArrayList(); + boolean changed = false; + for (OrderKey orderKey : sort.getOrderKeys()) { + Expression expr = orderKey.getExpr(); + Optional<Expression> encode = getEncodeExpression(expr); + if (encode.isPresent()) { + newOrderKeys.add(new OrderKey(encode.get(), + orderKey.isAsc(), + orderKey.isNullFirst())); + changed = true; + } else { + newOrderKeys.add(orderKey); + } + } + return changed ? sort.withOrderKeys(newOrderKeys) : sort; + } + + private Optional<Expression> getEncodeExpression(Expression expression) { + DataType type = expression.getDataType(); + Expression encodeExpr = null; + if (type instanceof CharacterType) { + CharacterType ct = (CharacterType) type; + if (ct.getLen() > 0) { + // skip column from variant, like 'L.var["L_SHIPMODE"] AS TEXT' + if (ct.getLen() < 2) { + encodeExpr = new EncodeAsSmallInt(expression); + } else if (ct.getLen() < 4) { + encodeExpr = new EncodeAsInt(expression); + } else if (ct.getLen() < 7) { + encodeExpr = new EncodeAsBigInt(expression); + } else if (ct.getLen() < 15) { + encodeExpr = new EncodeAsLargeInt(expression); + } + } + } + return Optional.ofNullable(encodeExpr); + } + + /* + example: + [support] select sum(v) from t group by substring(k, 1,2) + [not support] select substring(k, 1,2), sum(v) from t group by substring(k, 1,2) + [support] select k, sum(v) from t group by k + [not support] select substring(k, 1,2), sum(v) from t group by k + [support] select A as B from T group by A + */ + private Map<Expression, Expression> getEncodeGroupByExpressions(LogicalAggregate<Plan> aggregate) { + Map<Expression, Expression> encodeGroupbyExpressions = Maps.newHashMap(); + for (Expression gb : aggregate.getGroupByExpressions()) { + Optional<Expression> encodeExpr = getEncodeExpression(gb); + encodeExpr.ifPresent(expression -> encodeGroupbyExpressions.put(gb, expression)); + } + return encodeGroupbyExpressions; + } + + private LogicalAggregate<Plan> compressedMaterializeAggregate(LogicalAggregate<Plan> aggregate) { + Map<Expression, Expression> encodeGroupByExpressions = getEncodeGroupByExpressions(aggregate); + if (!encodeGroupByExpressions.isEmpty()) { + List<Expression> newGroupByExpressions = Lists.newArrayList(); + for (Expression gp : aggregate.getGroupByExpressions()) { + newGroupByExpressions.add(encodeGroupByExpressions.getOrDefault(gp, gp)); + } + List<NamedExpression> newOutputs = Lists.newArrayList(); + Map<Expression, Expression> decodeMap = new HashMap<>(); + for (Expression gp : encodeGroupByExpressions.keySet()) { + decodeMap.put(gp, new DecodeAsVarchar(encodeGroupByExpressions.get(gp))); + } + for (NamedExpression out : aggregate.getOutputExpressions()) { + Expression replaced = ExpressionUtils.replace(out, decodeMap); + if (out != replaced) { + if (out instanceof SlotReference) { + newOutputs.add(new Alias(out.getExprId(), replaced, out.getName())); + } else if (out instanceof Alias) { + newOutputs.add(((Alias) out).withChildren(replaced.children())); + } else { + // should not reach here + Preconditions.checkArgument(false, "output abnormal: " + aggregate); + } + } else { + newOutputs.add(out); + } + } + aggregate = aggregate.withGroupByAndOutput(newGroupByExpressions, newOutputs); + } + return aggregate; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java index f6f7c2d1100..38b687ba838 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java @@ -22,16 +22,21 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar; +import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeStrToInteger; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; import org.apache.doris.nereids.trees.plans.logical.LogicalLimit; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.PlanUtils; +import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import java.util.LinkedHashSet; import java.util.List; import java.util.Set; @@ -80,8 +85,15 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { // just return unchanged plan return null; } - project = (LogicalProject<? extends Plan>) project.withChildren(new LogicalFilter<>( - ExpressionUtils.replace(splitConjuncts.second, project.getAliasToProducer()), + Set<Expression> conjuncts; + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableCompressMaterialize) { + conjuncts = ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second), + project.getAliasToProducer()); + } else { + conjuncts = ExpressionUtils.replace(splitConjuncts.second, + project.getAliasToProducer()); + } + project = (LogicalProject<? extends Plan>) project.withChildren(new LogicalFilter<>(conjuncts, project.child())); return PlanUtils.filterOrSelf(splitConjuncts.first, project); } @@ -97,10 +109,17 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { if (splitConjuncts.second.isEmpty()) { return null; } + Set<Expression> conjuncts; + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().enableCompressMaterialize) { + conjuncts = ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second), + project.getAliasToProducer()); + } else { + conjuncts = ExpressionUtils.replace(splitConjuncts.second, + project.getAliasToProducer()); + } project = project.withProjectsAndChild(project.getProjects(), new LogicalFilter<>( - ExpressionUtils.replace(splitConjuncts.second, - project.getAliasToProducer()), + conjuncts, limit.withChildren(project.child()))); return PlanUtils.filterOrSelf(splitConjuncts.first, project); } @@ -119,4 +138,34 @@ public class PushDownFilterThroughProject implements RewriteRuleFactory { } return Pair.of(remainPredicates, pushDownPredicates); } + + private static Set<Expression> eliminateDecodeAndEncode(Set<Expression> expressions) { + LinkedHashSet<Expression> eliminated = new LinkedHashSet<Expression>(); + // keep expression order + for (Expression expression : expressions) { + eliminated.add(eliminateDecodeAndEncode(expression)); + } + return eliminated; + } + + private static Expression eliminateDecodeAndEncode(Expression expression) { + if (expression instanceof DecodeAsVarchar && expression.child(0) instanceof EncodeStrToInteger) { + return expression.child(0).child(0); + } + boolean hasNewChild = false; + List<Expression> newChildren = Lists.newArrayList(); + for (Expression child : expression.children()) { + Expression replace = eliminateDecodeAndEncode(child); + if (replace != child) { + hasNewChild = true; + newChildren.add(replace); + } else { + newChildren.add(child); + } + } + if (hasNewChild) { + return expression.withChildren(newChildren); + } + return expression; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java index 59a31b4da49..7d798ecf3e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java @@ -34,7 +34,7 @@ import java.util.List; * ScalarFunction 'EncodeAsBigInt'. */ public class EncodeAsBigInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java index 30729354379..5c6382d6ea1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java @@ -34,7 +34,7 @@ import java.util.List; * ScalarFunction 'EncodeAsInt'. */ public class EncodeAsInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(IntegerType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java index 7cfce246257..bb30a9a8e8a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java @@ -34,7 +34,7 @@ import java.util.List; * ScalarFunction 'EncodeAsLargeInt'. */ public class EncodeAsLargeInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(LargeIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java index 0809c935a57..355a740197c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java @@ -34,7 +34,7 @@ import java.util.List; * ScalarFunction 'CompressAsSmallInt'. */ public class EncodeAsSmallInt extends ScalarFunction - implements ExplicitlyCastableSignature, PropagateNullable { + implements ExplicitlyCastableSignature, PropagateNullable, EncodeStrToInteger { public static final List<FunctionSignature> SIGNATURES = ImmutableList.of( FunctionSignature.ret(SmallIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java new file mode 100644 index 00000000000..87a9c43687d --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +/** + * Encode_as_XXXInt + */ +public interface EncodeStrToInteger { +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java index 404c30fe379..2a78b063a97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java @@ -236,6 +236,13 @@ public class PhysicalHashAggregate<CHILD_TYPE extends Plan> extends PhysicalUnar aggregateParam, maybeUsingStream, requireProperties); } + public PhysicalHashAggregate<Plan> withGroupByExpressions(List<Expression> newGroupByExpressions) { + return new PhysicalHashAggregate<>(newGroupByExpressions, outputExpressions, partitionExpressions, + aggregateParam, maybeUsingStream, groupExpression, getLogicalProperties(), + requireProperties, physicalProperties, statistics, + child()); + } + @Override public PhysicalHashAggregate<Plan> withChildren(List<Plan> children) { Preconditions.checkArgument(children.size() == 1); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 4b1049649b8..99043902819 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -2180,6 +2180,15 @@ public class SessionVariable implements Serializable, Writable { needForward = true, fuzzy = true) public boolean enableSortSpill = false; + @VariableMgr.VarAttr( + name = "ENABLE_COMPRESS_MATERIALIZE", + description = {"控制是否启用compress materialize。", + "enable compress-materialize. "}, + needForward = true, fuzzy = false, + varType = VariableAnnotation.EXPERIMENTAL + ) + public boolean enableCompressMaterialize = false; + @VariableMgr.VarAttr( name = ENABLE_AGG_SPILL, description = {"控制是否启用聚合算子落盘。默认为 false。", diff --git a/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out new file mode 100644 index 00000000000..eee04795628 --- /dev/null +++ b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out @@ -0,0 +1,55 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !agg_exec -- +aaaaa +bbbbb + +-- !output_contains_gpk -- +aaaaa aaaaa +bbbbb bbbbb + +-- !expr -- +aaa +bbb + +-- !encodeexpr -- +12 +3 + +-- !sort -- +\N 6 + 7 +a 1 +aa 2 +b 4 +b 5 +bb 3 +中 8 +国 9 + +-- !sort -- +国 9 +中 8 +bb 3 +b 4 +b 5 +aa 2 +a 1 + 7 +\N 6 + +-- !sort -- +国 9 +中 8 +bb 3 +b 5 +b 4 +aa 2 +a 1 + 7 +\N 6 + +-- !sort -- +国 9 +中 8 +bb 3 + diff --git a/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy new file mode 100644 index 00000000000..8489de2aa2a --- /dev/null +++ b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy @@ -0,0 +1,198 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("compress_materialize") { + sql """ + drop table if exists compress; + CREATE TABLE `compress` ( + `k` varchar(5) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compress values ("aaaaaa", 1), ("aaaaaa", 2), ("bbbbb", 3), ("bbbbb", 4), ("bbbbb", 5); + + + drop table if exists cmt2; + CREATE TABLE `cmt2` ( + `k2` varchar(5) NOT NULL, + `v2` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k2`) + DISTRIBUTED BY random + PROPERTIES ( + "replication_num" = "1" + ); + + insert into cmt2 values ("aaaa", 1), ("b", 3); + insert into cmt2 values("123456", 123456); + + set ENABLE_COMPRESS_MATERIALIZE = true; + """ + + explain{ + sql (""" + select k from compress group by k; + """) + contains("encode_as_bigint") + } + order_qt_agg_exec "select k from compress group by k;" + + explain{ + sql (""" + select k, substring(k, 1), sum(v) from compress group by k; + """) + contains("encode_as_bigint(k)") + } + order_qt_output_contains_gpk "select k, substring(k, 1) from compress group by k;" + + order_qt_expr """ select substring(k,1,3) from compress group by substring(k,1,3);""" + explain{ + sql "select substring(k,1,3) from compress group by substring(k,1,3);" + contains("encode_as_int(substring(k, 1, 3))") + } + + explain { + sql("select sum(v) from compress group by substring(k, 1, 3);") + contains("group by: encode_as_int(substring(k, 1, 3))") + } + + explain { + sql("select sum(v) from compress group by substring(k, 1, 4);") + contains("group by: encode_as_bigint(substring(k, 1, 4))") + } + + order_qt_encodeexpr "select sum(v) from compress group by substring(k, 1, 3);" + + // TODO: RF targets on compressed_materialze column is broken + // // verify that compressed materialization do not block runtime filter generation + // sql """ + // set disable_join_reorder=true; + // set runtime_filter_mode = GLOBAL; + // set runtime_filter_type=2; + // set enable_runtime_filter_prune=false; + // """ + + // qt_join """ + // explain shape plan + // select * + // from ( + // select k from compress group by k + // ) T join[broadcast] cmt2 on T.k = cmt2.k2; + // """ + + + sql """ + drop table if exists compressInt; + CREATE TABLE `compressInt` ( + `k` varchar(3) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compressInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + """ + explain{ + sql "select k from compressInt group by k" + contains("encode_as_int") + } + + sql """ + drop table if exists compressLargeInt; + CREATE TABLE `compressLargeInt` ( + `k` varchar(10) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compressLargeInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + """ + explain{ + sql "select k from compressLargeInt group by k" + contains("group by: encode_as_largeint(k)") + } + + + sql """ + drop table if exists notcompress; + CREATE TABLE `notcompress` ( + `k` varchar(16) NOT NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into notcompress values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + """ + explain{ + sql "select k from notcompress group by k" + notContains("encode_as_") + } + + sql """ + drop table if exists compressSort; + CREATE TABLE `compressSort` ( + `k` varchar(3) NULL, + `v` int NOT NULL + ) ENGINE=OLAP + duplicate KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + ); + + + insert into compressSort values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), ("b", 5); + insert into compressSort(v) values (6); + insert into compressSort values ("",7), ("中", 8), ("国", 9); + """ + explain { + sql "select v from compressSort order by k" + contains("order by: encode_as_int(k)") +// expect plan fragment: +// 1:VSORT(140) | +// order by: encode_as_int(k)[#5] ASC | +// algorithm: full sort | +// offset: 0 | +// distribute expr lists: + } + qt_sort "select * from compressSort order by k asc, v"; + qt_sort "select * from compressSort order by k desc, v"; + qt_sort "select * from compressSort order by k desc nulls last"; + qt_sort "select * from compressSort order by k desc nulls last, v limit 3"; + +} + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org