This is an automated email from the ASF dual-hosted git repository.

englefly pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 5286b901c17 [feat](nereids)Compressed materialize for aggregate and 
sort (#42408)
5286b901c17 is described below

commit 5286b901c175071dc438456b2d63c91e63eacaba
Author: minghong <zhoumingh...@selectdb.com>
AuthorDate: Tue Nov 12 16:33:16 2024 +0800

    [feat](nereids)Compressed materialize for aggregate and sort (#42408)
    
    ## Proposed changes
    if A is a short string column (len<4)
    1. 'select A from T group by A' => 'select
    decodeAsChar(encode_as_int(A)) from T group by encode_as_int(A)'
    2. 'select * from T order by A' => 'select * from T order by
    encode_as_int(A)'
    
    for other sizes, short string will be wrapped by
    encode_as_smallint/encode_as_bigint/encode_as_largeInt
    
    session variable ENABLE_COMPRESS_MATERIALIZE is used to swith on/off
    this optimization
    
    
    
    Issue Number: close #xxx
    
    <!--Describe your changes.-->
---
 .../doris/nereids/jobs/executor/Analyzer.java      |   2 +
 .../org/apache/doris/nereids/rules/RuleType.java   |   2 +
 .../rules/analysis/CompressedMaterialize.java      | 166 +++++++++++++++++
 .../rewrite/PushDownFilterThroughProject.java      |  57 +++++-
 .../functions/scalar/EncodeAsBigInt.java           |   2 +-
 .../expressions/functions/scalar/EncodeAsInt.java  |   2 +-
 .../functions/scalar/EncodeAsLargeInt.java         |   2 +-
 .../functions/scalar/EncodeAsSmallInt.java         |   2 +-
 .../functions/scalar/EncodeStrToInteger.java       |  24 +++
 .../plans/physical/PhysicalHashAggregate.java      |   7 +
 .../java/org/apache/doris/qe/SessionVariable.java  |   9 +
 .../compress_materialize/compress_materialize.out  |  55 ++++++
 .../compress_materialize.groovy                    | 198 +++++++++++++++++++++
 13 files changed, 520 insertions(+), 8 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
index 894d4264201..8985dadc0bd 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Analyzer.java
@@ -31,6 +31,7 @@ import org.apache.doris.nereids.rules.analysis.CheckAnalysis;
 import org.apache.doris.nereids.rules.analysis.CheckPolicy;
 import org.apache.doris.nereids.rules.analysis.CollectJoinConstraint;
 import org.apache.doris.nereids.rules.analysis.CollectSubQueryAlias;
+import org.apache.doris.nereids.rules.analysis.CompressedMaterialize;
 import org.apache.doris.nereids.rules.analysis.EliminateDistinctConstant;
 import org.apache.doris.nereids.rules.analysis.EliminateGroupByConstant;
 import org.apache.doris.nereids.rules.analysis.EliminateLogicalSelectHint;
@@ -166,6 +167,7 @@ public class Analyzer extends AbstractBatchJobExecutor {
             topDown(new EliminateGroupByConstant()),
 
             topDown(new SimplifyAggGroupBy()),
+            topDown(new CompressedMaterialize()),
             topDown(new NormalizeAggregate()),
             topDown(new HavingToFilter()),
             topDown(new QualifyToFilter()),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
index dbf96ef2f1f..beb8bd43655 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleType.java
@@ -106,6 +106,8 @@ public enum RuleType {
     CHECK_DATA_TYPES(RuleTypeClass.CHECK),
 
     // rewrite rules
+    COMPRESSED_MATERIALIZE_AGG(RuleTypeClass.REWRITE),
+    COMPRESSED_MATERIALIZE_SORT(RuleTypeClass.REWRITE),
     NORMALIZE_AGGREGATE(RuleTypeClass.REWRITE),
     NORMALIZE_SORT(RuleTypeClass.REWRITE),
     NORMALIZE_REPEAT(RuleTypeClass.REWRITE),
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
new file mode 100644
index 00000000000..7d8a7664f82
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CompressedMaterialize.java
@@ -0,0 +1,166 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.properties.OrderKey;
+import org.apache.doris.nereids.rules.Rule;
+import org.apache.doris.nereids.rules.RuleType;
+import org.apache.doris.nereids.trees.expressions.Alias;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.NamedExpression;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsBigInt;
+import org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsInt;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsLargeInt;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeAsSmallInt;
+import org.apache.doris.nereids.trees.plans.Plan;
+import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
+import org.apache.doris.nereids.trees.plans.logical.LogicalSort;
+import org.apache.doris.nereids.types.DataType;
+import org.apache.doris.nereids.types.coercion.CharacterType;
+import org.apache.doris.nereids.util.ExpressionUtils;
+import org.apache.doris.qe.ConnectContext;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * convert string to int in order to improve performance for aggregation and 
sorting.
+ *
+ * 1. AGG
+ * select A from T group by A
+ * =>
+ * select DecodeAsVarchar(encode_as_int(A)) from T group by encode_as_int(A)
+ *
+ * 2. Sort
+ * select * from T order by A
+ * =>
+ * select * from T order by encode_as_int(A)
+ */
+public class CompressedMaterialize implements AnalysisRuleFactory {
+    @Override
+    public List<Rule> buildRules() {
+        return ImmutableList.of(
+                RuleType.COMPRESSED_MATERIALIZE_AGG.build(
+                        logicalAggregate().when(a -> ConnectContext.get() != 
null
+                                && 
ConnectContext.get().getSessionVariable().enableCompressMaterialize)
+                        .then(this::compressedMaterializeAggregate)),
+                RuleType.COMPRESSED_MATERIALIZE_SORT.build(
+                        logicalSort().when(a -> ConnectContext.get() != null
+                                && 
ConnectContext.get().getSessionVariable().enableCompressMaterialize)
+                                .then(this::compressMaterializeSort)
+                )
+        );
+    }
+
+    private LogicalSort<Plan> compressMaterializeSort(LogicalSort<Plan> sort) {
+        List<OrderKey> newOrderKeys = Lists.newArrayList();
+        boolean changed = false;
+        for (OrderKey orderKey : sort.getOrderKeys()) {
+            Expression expr = orderKey.getExpr();
+            Optional<Expression> encode = getEncodeExpression(expr);
+            if (encode.isPresent()) {
+                newOrderKeys.add(new OrderKey(encode.get(),
+                        orderKey.isAsc(),
+                        orderKey.isNullFirst()));
+                changed = true;
+            } else {
+                newOrderKeys.add(orderKey);
+            }
+        }
+        return changed ? sort.withOrderKeys(newOrderKeys) : sort;
+    }
+
+    private Optional<Expression> getEncodeExpression(Expression expression) {
+        DataType type = expression.getDataType();
+        Expression encodeExpr = null;
+        if (type instanceof CharacterType) {
+            CharacterType ct = (CharacterType) type;
+            if (ct.getLen() > 0) {
+                // skip column from variant, like 'L.var["L_SHIPMODE"] AS TEXT'
+                if (ct.getLen() < 2) {
+                    encodeExpr = new EncodeAsSmallInt(expression);
+                } else if (ct.getLen() < 4) {
+                    encodeExpr = new EncodeAsInt(expression);
+                } else if (ct.getLen() < 7) {
+                    encodeExpr = new EncodeAsBigInt(expression);
+                } else if (ct.getLen() < 15) {
+                    encodeExpr = new EncodeAsLargeInt(expression);
+                }
+            }
+        }
+        return Optional.ofNullable(encodeExpr);
+    }
+
+    /*
+    example:
+    [support] select sum(v) from t group by substring(k, 1,2)
+    [not support] select substring(k, 1,2), sum(v) from t group by 
substring(k, 1,2)
+    [support] select k, sum(v) from t group by k
+    [not support] select substring(k, 1,2), sum(v) from t group by k
+    [support]  select A as B from T group by A
+    */
+    private Map<Expression, Expression> 
getEncodeGroupByExpressions(LogicalAggregate<Plan> aggregate) {
+        Map<Expression, Expression> encodeGroupbyExpressions = 
Maps.newHashMap();
+        for (Expression gb : aggregate.getGroupByExpressions()) {
+            Optional<Expression> encodeExpr = getEncodeExpression(gb);
+            encodeExpr.ifPresent(expression -> 
encodeGroupbyExpressions.put(gb, expression));
+        }
+        return encodeGroupbyExpressions;
+    }
+
+    private LogicalAggregate<Plan> 
compressedMaterializeAggregate(LogicalAggregate<Plan> aggregate) {
+        Map<Expression, Expression> encodeGroupByExpressions = 
getEncodeGroupByExpressions(aggregate);
+        if (!encodeGroupByExpressions.isEmpty()) {
+            List<Expression> newGroupByExpressions = Lists.newArrayList();
+            for (Expression gp : aggregate.getGroupByExpressions()) {
+                
newGroupByExpressions.add(encodeGroupByExpressions.getOrDefault(gp, gp));
+            }
+            List<NamedExpression> newOutputs = Lists.newArrayList();
+            Map<Expression, Expression> decodeMap = new HashMap<>();
+            for (Expression gp : encodeGroupByExpressions.keySet()) {
+                decodeMap.put(gp, new 
DecodeAsVarchar(encodeGroupByExpressions.get(gp)));
+            }
+            for (NamedExpression out : aggregate.getOutputExpressions()) {
+                Expression replaced = ExpressionUtils.replace(out, decodeMap);
+                if (out != replaced) {
+                    if (out instanceof SlotReference) {
+                        newOutputs.add(new Alias(out.getExprId(), replaced, 
out.getName()));
+                    } else if (out instanceof Alias) {
+                        newOutputs.add(((Alias) 
out).withChildren(replaced.children()));
+                    } else {
+                        // should not reach here
+                        Preconditions.checkArgument(false, "output abnormal: " 
+ aggregate);
+                    }
+                } else {
+                    newOutputs.add(out);
+                }
+            }
+            aggregate = aggregate.withGroupByAndOutput(newGroupByExpressions, 
newOutputs);
+        }
+        return aggregate;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
index f6f7c2d1100..38b687ba838 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PushDownFilterThroughProject.java
@@ -22,16 +22,21 @@ import org.apache.doris.nereids.rules.Rule;
 import org.apache.doris.nereids.rules.RuleType;
 import org.apache.doris.nereids.trees.expressions.Expression;
 import org.apache.doris.nereids.trees.expressions.Slot;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.DecodeAsVarchar;
+import 
org.apache.doris.nereids.trees.expressions.functions.scalar.EncodeStrToInteger;
 import org.apache.doris.nereids.trees.plans.Plan;
 import org.apache.doris.nereids.trees.plans.logical.LogicalFilter;
 import org.apache.doris.nereids.trees.plans.logical.LogicalLimit;
 import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
 import org.apache.doris.nereids.util.ExpressionUtils;
 import org.apache.doris.nereids.util.PlanUtils;
+import org.apache.doris.qe.ConnectContext;
 
 import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
 
+import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -80,8 +85,15 @@ public class PushDownFilterThroughProject implements 
RewriteRuleFactory {
             // just return unchanged plan
             return null;
         }
-        project = (LogicalProject<? extends Plan>) project.withChildren(new 
LogicalFilter<>(
-                ExpressionUtils.replace(splitConjuncts.second, 
project.getAliasToProducer()),
+        Set<Expression> conjuncts;
+        if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableCompressMaterialize) {
+            conjuncts = 
ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second),
+                    project.getAliasToProducer());
+        } else {
+            conjuncts = ExpressionUtils.replace(splitConjuncts.second,
+                    project.getAliasToProducer());
+        }
+        project = (LogicalProject<? extends Plan>) project.withChildren(new 
LogicalFilter<>(conjuncts,
                 project.child()));
         return PlanUtils.filterOrSelf(splitConjuncts.first, project);
     }
@@ -97,10 +109,17 @@ public class PushDownFilterThroughProject implements 
RewriteRuleFactory {
         if (splitConjuncts.second.isEmpty()) {
             return null;
         }
+        Set<Expression> conjuncts;
+        if (ConnectContext.get() != null && 
ConnectContext.get().getSessionVariable().enableCompressMaterialize) {
+            conjuncts = 
ExpressionUtils.replace(eliminateDecodeAndEncode(splitConjuncts.second),
+                    project.getAliasToProducer());
+        } else {
+            conjuncts = ExpressionUtils.replace(splitConjuncts.second,
+                    project.getAliasToProducer());
+        }
         project = project.withProjectsAndChild(project.getProjects(),
                 new LogicalFilter<>(
-                        ExpressionUtils.replace(splitConjuncts.second,
-                                project.getAliasToProducer()),
+                        conjuncts,
                         limit.withChildren(project.child())));
         return PlanUtils.filterOrSelf(splitConjuncts.first, project);
     }
@@ -119,4 +138,34 @@ public class PushDownFilterThroughProject implements 
RewriteRuleFactory {
         }
         return Pair.of(remainPredicates, pushDownPredicates);
     }
+
+    private static Set<Expression> eliminateDecodeAndEncode(Set<Expression> 
expressions) {
+        LinkedHashSet<Expression> eliminated = new LinkedHashSet<Expression>();
+        // keep expression order
+        for (Expression expression : expressions) {
+            eliminated.add(eliminateDecodeAndEncode(expression));
+        }
+        return eliminated;
+    }
+
+    private static Expression eliminateDecodeAndEncode(Expression expression) {
+        if (expression instanceof DecodeAsVarchar && expression.child(0) 
instanceof EncodeStrToInteger) {
+            return expression.child(0).child(0);
+        }
+        boolean hasNewChild = false;
+        List<Expression> newChildren = Lists.newArrayList();
+        for (Expression child : expression.children()) {
+            Expression replace = eliminateDecodeAndEncode(child);
+            if (replace != child) {
+                hasNewChild = true;
+                newChildren.add(replace);
+            } else {
+                newChildren.add(child);
+            }
+        }
+        if (hasNewChild) {
+            return expression.withChildren(newChildren);
+        }
+        return expression;
+    }
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
index 59a31b4da49..7d798ecf3e8 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsBigInt.java
@@ -34,7 +34,7 @@ import java.util.List;
  * ScalarFunction 'EncodeAsBigInt'.
  */
 public class EncodeAsBigInt extends ScalarFunction
-        implements ExplicitlyCastableSignature, PropagateNullable {
+        implements ExplicitlyCastableSignature, PropagateNullable, 
EncodeStrToInteger {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(BigIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
index 30729354379..5c6382d6ea1 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsInt.java
@@ -34,7 +34,7 @@ import java.util.List;
  * ScalarFunction 'EncodeAsInt'.
  */
 public class EncodeAsInt extends ScalarFunction
-        implements ExplicitlyCastableSignature, PropagateNullable {
+        implements ExplicitlyCastableSignature, PropagateNullable, 
EncodeStrToInteger {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(IntegerType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
index 7cfce246257..bb30a9a8e8a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsLargeInt.java
@@ -34,7 +34,7 @@ import java.util.List;
  * ScalarFunction 'EncodeAsLargeInt'.
  */
 public class EncodeAsLargeInt extends ScalarFunction
-        implements ExplicitlyCastableSignature, PropagateNullable {
+        implements ExplicitlyCastableSignature, PropagateNullable, 
EncodeStrToInteger {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(LargeIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
index 0809c935a57..355a740197c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeAsSmallInt.java
@@ -34,7 +34,7 @@ import java.util.List;
  * ScalarFunction 'CompressAsSmallInt'.
  */
 public class EncodeAsSmallInt extends ScalarFunction
-        implements ExplicitlyCastableSignature, PropagateNullable {
+        implements ExplicitlyCastableSignature, PropagateNullable, 
EncodeStrToInteger {
 
     public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
             
FunctionSignature.ret(SmallIntType.INSTANCE).args(VarcharType.SYSTEM_DEFAULT)
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java
new file mode 100644
index 00000000000..87a9c43687d
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/EncodeStrToInteger.java
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.scalar;
+
+/**
+ * Encode_as_XXXInt
+ */
+public interface EncodeStrToInteger {
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
index 404c30fe379..2a78b063a97 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java
@@ -236,6 +236,13 @@ public class PhysicalHashAggregate<CHILD_TYPE extends 
Plan> extends PhysicalUnar
                 aggregateParam, maybeUsingStream, requireProperties);
     }
 
+    public PhysicalHashAggregate<Plan> withGroupByExpressions(List<Expression> 
newGroupByExpressions) {
+        return new PhysicalHashAggregate<>(newGroupByExpressions, 
outputExpressions, partitionExpressions,
+                aggregateParam, maybeUsingStream, groupExpression, 
getLogicalProperties(),
+                requireProperties, physicalProperties, statistics,
+                child());
+    }
+
     @Override
     public PhysicalHashAggregate<Plan> withChildren(List<Plan> children) {
         Preconditions.checkArgument(children.size() == 1);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java 
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 4b1049649b8..99043902819 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -2180,6 +2180,15 @@ public class SessionVariable implements Serializable, 
Writable {
             needForward = true, fuzzy = true)
     public boolean enableSortSpill = false;
 
+    @VariableMgr.VarAttr(
+            name = "ENABLE_COMPRESS_MATERIALIZE",
+            description = {"控制是否启用compress materialize。",
+                    "enable compress-materialize. "},
+            needForward = true, fuzzy = false,
+            varType = VariableAnnotation.EXPERIMENTAL
+    )
+    public boolean enableCompressMaterialize = false;
+
     @VariableMgr.VarAttr(
             name = ENABLE_AGG_SPILL,
             description = {"控制是否启用聚合算子落盘。默认为 false。",
diff --git 
a/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out 
b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out
new file mode 100644
index 00000000000..eee04795628
--- /dev/null
+++ 
b/regression-test/data/nereids_p0/compress_materialize/compress_materialize.out
@@ -0,0 +1,55 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !agg_exec --
+aaaaa
+bbbbb
+
+-- !output_contains_gpk --
+aaaaa  aaaaa
+bbbbb  bbbbb
+
+-- !expr --
+aaa
+bbb
+
+-- !encodeexpr --
+12
+3
+
+-- !sort --
+\N     6
+       7
+a      1
+aa     2
+b      4
+b      5
+bb     3
+中      8
+国      9
+
+-- !sort --
+国      9
+中      8
+bb     3
+b      4
+b      5
+aa     2
+a      1
+       7
+\N     6
+
+-- !sort --
+国      9
+中      8
+bb     3
+b      5
+b      4
+aa     2
+a      1
+       7
+\N     6
+
+-- !sort --
+国      9
+中      8
+bb     3
+
diff --git 
a/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy
 
b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy
new file mode 100644
index 00000000000..8489de2aa2a
--- /dev/null
+++ 
b/regression-test/suites/nereids_p0/compress_materialize/compress_materialize.groovy
@@ -0,0 +1,198 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("compress_materialize") {
+    sql """
+    drop table if exists compress;
+    CREATE TABLE `compress` (
+    `k` varchar(5) NOT NULL,
+    `v` int NOT NULL
+    ) ENGINE=OLAP
+    duplicate KEY(`k`)
+    DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+    PROPERTIES (
+    "replication_num" = "1"
+    ); 
+
+
+    insert into compress values ("aaaaaa", 1), ("aaaaaa", 2), ("bbbbb", 3), 
("bbbbb", 4), ("bbbbb", 5);
+
+
+    drop table if exists cmt2;
+    CREATE TABLE `cmt2` (
+    `k2` varchar(5) NOT NULL,
+    `v2` int NOT NULL
+    ) ENGINE=OLAP
+    duplicate KEY(`k2`)
+    DISTRIBUTED BY random
+    PROPERTIES (
+    "replication_num" = "1"
+    ); 
+
+    insert into cmt2 values ("aaaa", 1), ("b", 3);
+    insert into cmt2 values("123456", 123456);
+    
+    set ENABLE_COMPRESS_MATERIALIZE = true;
+    """
+
+    explain{
+        sql ("""
+            select k from compress group by k;
+            """)
+        contains("encode_as_bigint")
+    }
+    order_qt_agg_exec "select k from compress group by k;"
+
+    explain{
+        sql ("""
+            select k, substring(k, 1), sum(v) from compress group by k;
+            """)
+        contains("encode_as_bigint(k)")
+    }
+    order_qt_output_contains_gpk "select k, substring(k, 1) from compress 
group by k;"
+
+    order_qt_expr """ select substring(k,1,3) from compress group by 
substring(k,1,3);"""
+    explain{
+        sql "select substring(k,1,3) from compress group by substring(k,1,3);"
+        contains("encode_as_int(substring(k, 1, 3))")
+    }
+
+    explain {
+        sql("select sum(v) from compress group by substring(k, 1, 3);")
+        contains("group by: encode_as_int(substring(k, 1, 3))")
+    }
+
+    explain {
+        sql("select sum(v) from compress group by substring(k, 1, 4);")
+        contains("group by: encode_as_bigint(substring(k, 1, 4))")
+    }
+
+    order_qt_encodeexpr "select sum(v) from compress group by substring(k, 1, 
3);"
+
+    // TODO: RF targets on compressed_materialze column is broken
+    // // verify that compressed materialization do not block runtime filter 
generation
+    // sql """
+    // set disable_join_reorder=true;
+    // set runtime_filter_mode = GLOBAL;
+    // set runtime_filter_type=2;
+    // set enable_runtime_filter_prune=false;
+    // """
+
+    // qt_join """
+    // explain shape plan 
+    // select *
+    // from (
+    //     select k from compress group by k
+    // ) T join[broadcast] cmt2 on T.k = cmt2.k2;
+    // """
+
+
+    sql """
+    drop table if exists compressInt;
+    CREATE TABLE `compressInt` (
+    `k` varchar(3) NOT NULL,
+    `v` int NOT NULL
+    ) ENGINE=OLAP
+    duplicate KEY(`k`)
+    DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+    PROPERTIES (
+    "replication_num" = "1"
+    ); 
+
+
+    insert into compressInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), 
("b", 5);
+    """
+    explain{
+        sql "select k from compressInt group by k"
+        contains("encode_as_int")
+    }
+
+    sql """
+    drop table if exists compressLargeInt;
+    CREATE TABLE `compressLargeInt` (
+    `k` varchar(10) NOT NULL,
+    `v` int NOT NULL
+    ) ENGINE=OLAP
+    duplicate KEY(`k`)
+    DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+    PROPERTIES (
+    "replication_num" = "1"
+    ); 
+
+
+    insert into compressLargeInt values ("a", 1), ("aa", 2), ("bb", 3), ("b", 
4), ("b", 5);
+    """
+    explain{
+        sql "select k from compressLargeInt group by k"
+        contains("group by: encode_as_largeint(k)")
+    }
+
+
+    sql """
+    drop table if exists notcompress;
+    CREATE TABLE `notcompress` (
+    `k` varchar(16) NOT NULL,
+    `v` int NOT NULL
+    ) ENGINE=OLAP
+    duplicate KEY(`k`)
+    DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+    PROPERTIES (
+    "replication_num" = "1"
+    ); 
+
+
+    insert into notcompress values ("a", 1), ("aa", 2), ("bb", 3), ("b", 4), 
("b", 5);
+    """
+    explain{
+        sql "select k from notcompress group by k"
+        notContains("encode_as_")
+    }
+
+    sql """
+        drop table if exists compressSort;
+        CREATE TABLE `compressSort` (
+        `k` varchar(3) NULL,
+        `v` int NOT NULL
+        ) ENGINE=OLAP
+        duplicate KEY(`k`)
+        DISTRIBUTED BY HASH(`k`) BUCKETS AUTO
+        PROPERTIES (
+        "replication_num" = "1"
+        ); 
+
+
+        insert into compressSort values ("a", 1), ("aa", 2), ("bb", 3), ("b", 
4), ("b", 5);
+        insert into compressSort(v) values (6);
+        insert into compressSort values ("",7), ("中", 8), ("国", 9);
+    """
+    explain {
+        sql "select v from compressSort order by k"
+        contains("order by: encode_as_int(k)")
+// expect plan fragment:
+// 1:VSORT(140)                                                                
                             |
+//   order by: encode_as_int(k)[#5] ASC                                        
                            |
+//   algorithm: full sort                                                      
                            |
+//   offset: 0                                                                 
                            |
+//   distribute expr lists:  
+    }
+    qt_sort "select * from compressSort order by k asc, v";
+    qt_sort "select * from compressSort order by k desc, v";
+    qt_sort "select * from compressSort order by k desc nulls last";
+    qt_sort "select * from compressSort order by k desc nulls last, v limit 3";
+
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to