This is an automated email from the ASF dual-hosted git repository. morrysnow pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 8d3735910ba [refactor](Nereids) New expression extractor for partitions pruning (#36405) 8d3735910ba is described below commit 8d3735910baf7aab63cdceb52526ff8b11bd026d Author: XuPengfei <x624464...@hotmail.com> AuthorDate: Wed Jun 19 12:17:10 2024 +0800 [refactor](Nereids) New expression extractor for partitions pruning (#36405) cherry pick from #36326 An exception throw in TryEliminateUninterestedPredicates, for this case CREATE TABLE `tbltest` ( `id` INT NULL, `col2` VARCHAR(255) NULL, `col3` VARCHAR(255) NULL, `dt` DATE NULL ) ENGINE=OLAP DUPLICATE KEY(`id`, `col2`) PARTITION BY RANGE(`dt`) (PARTITION p20240617 VALUES [('2024-06-17'), ('2024-06-18'))) DISTRIBUTED BY HASH(`id`) BUCKETS 10 PROPERTIES ( "replication_allocation" = "tag.location.default: 1" ); select * from tbltest where case when col2 = 'xxx' and col3='yyy' then false -- note this is not about partition column when col2 in ('xxx') then false when col2 like 'xxx%' then false else true end The `CaseWhen` require children should be `WhenClause`, TryEliminateUninterestedPredicates maybe rewrite the WhenClause to true/false predicate, and cause this exception: ERROR 1105 (HY000): errCode = 2, detailMessage = The children format needs to be [WhenClause+, DefaultValue?] Original extractor(TryEliminateUninterestedPredicates.java) caused some errors while try to derive the expressions which can be used for pruning partitions. I tried to write a new extractor(and with unit tests) for pruning partitions, it is more simple and reliable (I think). The theory of extractor is pretty simple: A:Sort the expression in two kinds: 1. evaluable-expression (let's mark it as E). Expressions that can be evaluated in the partition pruning stage. In the other word: not contains non-partition slots or deterministic expression. 2. un-evaluable-expression (let's mark it as UE). Expressions that can NOT be evaluated in the partition pruning stage. In the other word: contains non-partition slots or deterministic expression. B: Travel the predicate, only point on AND and OR operator, following the rule: (E and UE) -> (E and TRUE) -> E (UE and UE) -> TRUE (E and E) -> (E and E) (E or UE) -> TRUE (UE or UE) -> TRUE (E or E) -> (E or E) --- .../rules/PartitionPruneExpressionExtractor.java | 176 +++++++++++++ .../rules/expression/rules/PartitionPruner.java | 2 +- .../rules/TryEliminateUninterestedPredicates.java | 143 ----------- .../PartitionPruneExpressionExtractorTest.java | 273 +++++++++++++++++++++ 4 files changed, 450 insertions(+), 144 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruneExpressionExtractor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruneExpressionExtractor.java new file mode 100644 index 00000000000..246c004d82b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruneExpressionExtractor.java @@ -0,0 +1,176 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.expression.rules; + +import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext; +import org.apache.doris.nereids.rules.expression.rules.PartitionPruneExpressionExtractor.Context; +import org.apache.doris.nereids.trees.expressions.And; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Or; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SubqueryExpr; +import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; +import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; + +import com.google.common.annotations.VisibleForTesting; + +import java.util.Objects; +import java.util.Set; + +/** + * PartitionPruneExpressionExtractor + * + * This rewriter only used to extract the expression that can be used in partition pruning from + * the whole predicate expression. + * The theory of extractor is pretty simple: + * A:Sort the expression in two kinds: + * 1. evaluable-expression (let's mark it as E). + * Expressions that can be evaluated in the partition pruning stage. + * In the other word: not contains non-partition slots or deterministic expression. + * 2. un-evaluable-expression (let's mark it as UE). + * Expressions that can NOT be evaluated in the partition pruning stage. + * In the other word: contains non-partition slots or deterministic expression. + * + * B: Travel the predicate, only point on AND and OR operator, following the rule: + * (E and UE) -> (E and TRUE) -> E + * (UE and UE) -> TRUE + * (E and E) -> (E and E) + * (E or UE) -> TRUE + * (UE or UE) -> TRUE + * (E or E) -> (E or E) + * + * e.g. + * (part = 1 and non_part = 'a') or (part = 2) + * -> (part = 1 and true) or (part = 2) + * -> (part = 1) or (part = 2) + * + * It's better that do some expression optimize(like fold, eliminate etc.) on predicate before this step. + */ +public class PartitionPruneExpressionExtractor extends DefaultExpressionRewriter<Context> { + private final ExpressionEvaluableDetector expressionEvaluableDetector; + + private PartitionPruneExpressionExtractor(Set<Slot> interestedSlots) { + this.expressionEvaluableDetector = new ExpressionEvaluableDetector(interestedSlots); + } + + /** + * Extract partition prune expression from predicate + */ + public static Expression extract(Expression predicate, + Set<Slot> partitionSlots, + CascadesContext cascadesContext) { + predicate = predicate.accept(FoldConstantRuleOnFE.INSTANCE, new ExpressionRewriteContext(cascadesContext)); + PartitionPruneExpressionExtractor rewriter = new PartitionPruneExpressionExtractor(partitionSlots); + Context context = new Context(); + Expression partitionPruneExpression = predicate.accept(rewriter, context); + if (context.containsUnEvaluableExpression) { + return BooleanLiteral.TRUE; + } + return partitionPruneExpression; + } + + @Override + public Expression visit(Expression originExpr, Context parentContext) { + if (originExpr instanceof And) { + return this.visitAnd((And) originExpr, parentContext); + } + if (originExpr instanceof Or) { + return this.visitOr((Or) originExpr, parentContext); + } + + parentContext.containsUnEvaluableExpression = !expressionEvaluableDetector.detect(originExpr); + return originExpr; + } + + @Override + public Expression visitAnd(And node, Context parentContext) { + // handle left node + Context leftContext = new Context(); + Expression newLeft = node.left().accept(this, leftContext); + // handle right node + Context rightContext = new Context(); + Expression newRight = node.right().accept(this, rightContext); + + // if anyone of them is FALSE, the whole expression should be FALSE. + if (newLeft == BooleanLiteral.FALSE || newRight == BooleanLiteral.FALSE) { + return BooleanLiteral.FALSE; + } + + // If left node contains non-partition slot or is TURE, just discard it. + if (newLeft == BooleanLiteral.TRUE || leftContext.containsUnEvaluableExpression) { + return rightContext.containsUnEvaluableExpression ? BooleanLiteral.TRUE : newRight; + } + + // If right node contains non-partition slot or is TURE, just discard it. + if (newRight == BooleanLiteral.TRUE || rightContext.containsUnEvaluableExpression) { + return newLeft; + } + + // both does not contains non-partition slot. + return new And(newLeft, newRight); + } + + @Override + public Expression visitOr(Or node, Context parentContext) { + // handle left node + Context leftContext = new Context(); + Expression newLeft = node.left().accept(this, leftContext); + // handle right node + Context rightContext = new Context(); + Expression newRight = node.right().accept(this, rightContext); + + // if anyone of them is TRUE or contains non-partition slot, just return TRUE. + if (newLeft == BooleanLiteral.TRUE || newRight == BooleanLiteral.TRUE + || leftContext.containsUnEvaluableExpression || rightContext.containsUnEvaluableExpression) { + return BooleanLiteral.TRUE; + } + + return new Or(newLeft, newRight); + } + + /** + * Context + */ + public static class Context { + private boolean containsUnEvaluableExpression; + } + + /** + * The detector only indicate that whether a predicate contains interested slots or not, + * and do not change the predicate. + */ + @VisibleForTesting + public static class ExpressionEvaluableDetector extends DefaultExpressionRewriter<Context> { + private final Set<Slot> partitionSlots; + + public ExpressionEvaluableDetector(Set<Slot> partitionSlots) { + this.partitionSlots = Objects.requireNonNull(partitionSlots, "partitionSlots can not be null"); + } + + /** + * Return true if expression does NOT contains un-evaluable expression. + */ + @VisibleForTesting + public boolean detect(Expression expression) { + boolean containsUnEvaluableExpression = expression.anyMatch( + expr -> expr instanceof SubqueryExpr || (expr instanceof Slot && !partitionSlots.contains(expr))); + return !containsUnEvaluableExpression; + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java index 6089cf31a61..1499cfec4a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/PartitionPruner.java @@ -104,7 +104,7 @@ public class PartitionPruner extends DefaultExpressionRewriter<Void> { public static List<Long> prune(List<Slot> partitionSlots, Expression partitionPredicate, Map<Long, PartitionItem> idToPartitions, CascadesContext cascadesContext, PartitionTableType partitionTableType) { - partitionPredicate = TryEliminateUninterestedPredicates.rewrite( + partitionPredicate = PartitionPruneExpressionExtractor.extract( partitionPredicate, ImmutableSet.copyOf(partitionSlots), cascadesContext); partitionPredicate = PredicateRewriteForPartitionPrune.rewrite(partitionPredicate, cascadesContext); List<OnePartitionEvaluator> evaluators = idToPartitions.entrySet() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/TryEliminateUninterestedPredicates.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/TryEliminateUninterestedPredicates.java deleted file mode 100644 index b9c9f3732e9..00000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/TryEliminateUninterestedPredicates.java +++ /dev/null @@ -1,143 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.expression.rules; - -import org.apache.doris.nereids.CascadesContext; -import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext; -import org.apache.doris.nereids.rules.expression.rules.TryEliminateUninterestedPredicates.Context; -import org.apache.doris.nereids.trees.expressions.And; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.Slot; -import org.apache.doris.nereids.trees.expressions.literal.BooleanLiteral; -import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; - -import java.util.Set; - -/** - * TryEliminateUninterestedPredicates - * - * this rewriter usually used to extract the partition columns related predicates, - * and try to eliminate partition columns related predicate. - * - * e.g. - * (part = 1 and non_part = 'a') or (part = 2) - * -> (part = 1 and true) or (part = 2) - * -> (part = 1) or (part = 2) - * - * maybe eliminate failed in some special cases, e.g. (non_part + part) = 2. - * the key point is: if a predicate(return boolean type) only contains the uninterested slots, we can eliminate it. - */ -public class TryEliminateUninterestedPredicates extends DefaultExpressionRewriter<Context> { - private final Set<Slot> interestedSlots; - private final ExpressionRewriteContext expressionRewriteContext; - - private TryEliminateUninterestedPredicates(Set<Slot> interestedSlots, CascadesContext cascadesContext) { - this.interestedSlots = interestedSlots; - this.expressionRewriteContext = new ExpressionRewriteContext(cascadesContext); - } - - public static Expression rewrite(Expression expression, Set<Slot> interestedSlots, - CascadesContext cascadesContext) { - // before eliminate uninterested predicate, we must push down `Not` under CompoundPredicate - expression = expression.accept(new SimplifyNotExprRule(), null); - TryEliminateUninterestedPredicates rewriter = new TryEliminateUninterestedPredicates( - interestedSlots, cascadesContext); - return expression.accept(rewriter, new Context()); - } - - @Override - public Expression visit(Expression originExpr, Context parentContext) { - Context currentContext = new Context(); - // postorder traversal - Expression expr = super.visit(originExpr, currentContext); - - // process predicate - if (expr.getDataType().isBooleanType()) { - // if a predicate contains not only interested slots but also non-interested slots, - // we can not eliminate non-interested slots: - // e.g. - // not(uninterested slot b + interested slot a > 1) - // -> not(uninterested slot b + interested slot a > 1) - if (!currentContext.childrenContainsInterestedSlots && currentContext.childrenContainsNonInterestedSlots) { - // propagate true value up to eliminate uninterested slots, - // because we don't know the runtime value of the slots - // e.g. - // not(uninterested slot b > 1) - // -> not(true) - // -> true - expr = BooleanLiteral.TRUE; - } else { - // simplify the predicate expression, the interested slots may be eliminated too - // e.g. - // ((interested slot a) and not(uninterested slot b > 1)) or true - // -> ((interested slot a) and not(true)) or true - // -> ((interested slot a) and true) or true - // -> (interested slot a) or true - // -> true - expr = expr.accept(FoldConstantRuleOnFE.INSTANCE, expressionRewriteContext); - } - } else { - // ((uninterested slot b > 0) + 1) > 1 - // -> (true + 1) > 1 - // -> ((uninterested slot b > 0) + 1) > 1 (recover to origin expr because `true + 1` is not predicate) - // -> true (not contains interested slot but contains uninterested slot) - expr = originExpr; - } - - parentContext.childrenContainsInterestedSlots |= currentContext.childrenContainsInterestedSlots; - parentContext.childrenContainsNonInterestedSlots |= currentContext.childrenContainsNonInterestedSlots; - - return expr; - } - - @Override - public Expression visitAnd(And and, Context parentContext) { - Expression left = and.left(); - Context leftContext = new Context(); - Expression newLeft = left.accept(this, leftContext); - - if (leftContext.childrenContainsNonInterestedSlots) { - newLeft = BooleanLiteral.TRUE; - } - - Expression right = and.right(); - Context rightContext = new Context(); - Expression newRight = this.visit(right, rightContext); - if (rightContext.childrenContainsNonInterestedSlots) { - newRight = BooleanLiteral.TRUE; - } - Expression expr = new And(newLeft, newRight).accept(FoldConstantRuleOnFE.INSTANCE, expressionRewriteContext); - parentContext.childrenContainsInterestedSlots = - rightContext.childrenContainsInterestedSlots || leftContext.childrenContainsInterestedSlots; - return expr; - } - - @Override - public Expression visitSlot(Slot slot, Context context) { - boolean isInterestedSlot = interestedSlots.contains(slot); - context.childrenContainsInterestedSlots |= isInterestedSlot; - context.childrenContainsNonInterestedSlots |= !isInterestedSlot; - return slot; - } - - /** Context */ - public static class Context { - private boolean childrenContainsInterestedSlots; - private boolean childrenContainsNonInterestedSlots; - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PartitionPruneExpressionExtractorTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PartitionPruneExpressionExtractorTest.java new file mode 100644 index 00000000000..d9f49d88f30 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/PartitionPruneExpressionExtractorTest.java @@ -0,0 +1,273 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.expression; + +import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.analyzer.UnboundRelation; +import org.apache.doris.nereids.analyzer.UnboundSlot; +import org.apache.doris.nereids.parser.NereidsParser; +import org.apache.doris.nereids.rules.expression.rules.PartitionPruneExpressionExtractor; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.plans.RelationId; +import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.BooleanType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.DoubleType; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.TinyIntType; +import org.apache.doris.nereids.util.MemoTestUtils; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * This unit is used to check whether {@link PartitionPruneExpressionExtractor} is correct or not. + * Slot P1 ~ P5 are partition slots. + */ +public class PartitionPruneExpressionExtractorTest { + private static final NereidsParser PARSER = new NereidsParser(); + private final CascadesContext cascadesContext = MemoTestUtils.createCascadesContext( + new UnboundRelation(new RelationId(1), ImmutableList.of("tbl"))); + private final Map<String, Slot> slotMemo = Maps.newHashMap(); + private final Set<Slot> partitionSlots; + private final PartitionPruneExpressionExtractor.ExpressionEvaluableDetector evaluableDetector; + + public PartitionPruneExpressionExtractorTest() { + Map<String, Slot> partitions = createPartitionSlots(); + slotMemo.putAll(partitions); + partitionSlots = ImmutableSet.copyOf(partitions.values()); + evaluableDetector = new PartitionPruneExpressionExtractor.ExpressionEvaluableDetector(partitionSlots); + } + + /** + * Expect: All expressions which contains non-partition slot are not evaluable. + */ + @Test + public void testExpressionEvaluableDetector() { + // expression does not contains any non-partition slot. + assertDeterminateEvaluable("P1 = '20240614'", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614'", true); + assertDeterminateEvaluable("P1 = '20240614' or P2 = '20240614'", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' and true", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' and false", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' and 5 > 10", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' or 'a' = 'b'", true); + assertDeterminateEvaluable("P1 = '20240614' and not(P2 = '20240614') or 'a' = 'b'", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' or not('a' = 'b')", true); + assertDeterminateEvaluable("P1 = '20240614' and " + + "case when(P2 = '20240614' or P2 = 'abc') then P3 = 'abc' else false end", true); + assertDeterminateEvaluable("P1 = '20240614' and " + + "case when(P2 = '20240614' and P1 = 'abc') then P3 = 'abc' else false end", true); + assertDeterminateEvaluable("P1 = '20240614' and " + + "if(P2 = '20240614' and P1 = 'abc', P3 = 'abc', false)", true); + assertDeterminateEvaluable("P1 = '20240614' and " + + "if(P2 = '20240614' and '123' = 'abc', P1 = 'abc', false)", true); + assertDeterminateEvaluable("P1 = '20240614' and " + + "to_date('20240614', '%Y%m%d') = P2", true); + + // expression contains non-partition slot. + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' and I1 = 'abc'", false); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' or I1 = 'abc'", false); + assertDeterminateEvaluable("P1 = '20240614' and (P2 = '20240614' and I1 = 'abc')", false); + assertDeterminateEvaluable("P1 = '20240614' and (P2 = '20240614' or I1 = 'abc')", false); + assertDeterminateEvaluable("P1 = '20240614' and (P2 = '20240614' or I1 = 'abc')", false); + assertDeterminateEvaluable("P1 = '20240614' and not(P2 = '20240614') or 'S1' = 'b'", true); + assertDeterminateEvaluable("P1 = '20240614' and P2 = '20240614' or not('S2' = 'b')", true); + assertDeterminateEvaluable("P1 = '20240614' and " + + "case when(P2 = '20240614' or I1 = 'abc') then I2 = 'abc' else false end", false); + assertDeterminateEvaluable("P1 = '20240614' and " + + "case when(P2 = '20240614' and I1 = 'abc') then I2 = 'abc' else false end", false); + assertDeterminateEvaluable("P1 = '20240614' and " + + "if(P2 = '20240614' and I1 = 'abc', I2 = 'abc', false)", false); + assertDeterminateEvaluable("P1 = '20240614' and " + + "if(P2 = '20240614' and I1 = 'abc', I2 = 'abc', false)", false); + assertDeterminateEvaluable("P1 = '20240614' and " + + "to_date('20240614', '%Y%m%d') = S1", false); + assertDeterminateEvaluable("P1 = '20240614' and " + + "(select 'a' from t limit 1) = S1", false); + } + + @Test + public void testExpressionExtract() { + assertExtract("P1 = '20240614'", "P1 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614'", "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' or P2 = '20240614'", "P1 = '20240614' or P2 = '20240614'"); + + assertExtract("P1 = '20240614' and P2 = '20240614' and true", + "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' and false", "false"); + assertExtract("P1 = '20240614' and P2 = '20240614' and 5 > 10", "false"); + assertExtract("P1 = '20240614' and P2 = '20240614' and I1 = 'abc'", + "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' and (5 > 10 and I2 = 123)", "false"); + assertExtract("P1 = '20240614' and P2 = '20240614' or I1 = 'abc'", "true"); + + assertExtract("P1 = '20240614' and P2 = '20240614' or false", + "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' or 5 < 10", "true"); + assertExtract("P1 = '20240614' and P2 = '20240614' or (5 < 10 or I2 = 123)", "true"); + assertExtract("P1 = '20240614' and P2 = '20240614' or (5 < 10 and I2 = 123)", "true"); + assertExtract("P1 = '20240614' and (P2 = '20240614' and I1 = 'abc')", + "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' and (P2 = '20240614' or I1 = 'abc')", "P1 = '20240614'"); + assertExtract("P1 = '20240614' and (P2 = '20240614' or I1 = 'abc')", "P1 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' or I2 = 123", "true"); + assertExtract("P1 = '20240614' and P2 = '20240614' or not(I2 = 123)", "true"); + assertExtract("P1 = '20240614' and P2 = '20240614' or not(P3 = '20240614' and I2 = 123)", "true"); + + assertExtract("P1 = '20240614' and P2 = '20240614' or (5 > 10 and I2 = 123)", + "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' and not(5 > 10 and I2 = 123)", + "P1 = '20240614' and P2 = '20240614'"); + + assertExtract("P1 = '20240614' and P2 = '20240614' and (P3 = '20240614' and (P4 = '20240614' or I1 = 123))", + "P1 = '20240614' and P2 = '20240614' and P3 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' and " + + "(P3 = '20240614' or (P4 = '20240614' and P5 = '20240614' or I1 = 123))", + "P1 = '20240614' and P2 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' and " + + "(P3 = '20240614' or (P4 = '20240614' or I1 = 123 and P5 = '20240614'))", + "P1 = '20240614' and P2 = '20240614' and (P3 = '20240614' or (P4 = '20240614' or P5 = '20240614'))"); + assertExtract("P1 = '20240614' and P2 = '20240614' and " + + "(P3 = '20240614' or ((P4 = '20240614' or I1 = 123) and P5 = '20240614'))", + "P1 = '20240614' and P2 = '20240614' and (P3 = '20240614' or P5 = '20240614')"); + assertExtract("I2 = 345 or (P1 = '20240614' and P2 = '20240614') and " + + "(P3 = '20240614' or (P4 = '20240614' and P5 = '20240614' and I1 = 123))", + "true"); + assertExtract("(I2 = 345 or P1 = '20240614') and P2 = '20240614' and " + + "(P3 = '20240614' or (P4 = '20240614' and P5 = '20240614' and I1 = 123))", + "P2 = '20240614' and (P3 = '20240614' or (P4 = '20240614' and P5 = '20240614'))"); + assertExtract("(I2 = 345 or P1 = '20240614') and P2 = '20240614' and " + + "(P3 = '20240614' or (P4 = '20240614' and P5 = '20240614' or I1 = 123))", + "P2 = '20240614'"); + assertExtract("P1 = '20240614' and P2 = '20240614' or " + + "(P3 = '20240614' or (P4 = '20240614' and P5 = '20240614' or I1 = 123))", + "true"); + assertExtract("P1 = '20240614' and case when(P2 = '20240614' or P2 = 'abc') then P3 = 'abc' else false end", + "P1 = '20240614' and case when(P2 = '20240614' or P2 = 'abc') then P3 = 'abc' else false end"); + assertExtract("P1 = '20240614' and case when(P2 = '20240614' and P1 = 'abc') then P3 = 'abc' else false end", + "P1 = '20240614' and case when(P2 = '20240614' and P1 = 'abc') then P3 = 'abc' else false end"); + assertExtract("P1 = '20240614' and case when(P2 = '20240614' or I1 = 'abc') then I2 = 'abc' else false end", + "P1 = '20240614'"); + assertExtract("P1 = '20240614' and case when(P2 = '20240614' and I1 = 'abc') then I2 = 'abc' else false end", + "P1 = '20240614'"); + assertExtract("P1 = '20240614' or if(P2 = '20240614' and P1 = 'abc', P3 = 'abc', false)", + "P1 = '20240614' or if(P2 = '20240614' and P1 = 'abc', P3 = 'abc', false)"); + assertExtract("P1 = '20240614' or if(P2 = '20240614' and '123' = 'abc', P1 = 'abc', false)", + "P1 = '20240614' or if(false, P1 = 'abc', false)"); + assertExtract("P1 = '20240614' or if(P2 = '20240614' and I1 = 'abc', I2 = 'abc', false)", "true"); + assertExtract("P1 = '20240614' or if(P2 = '20240614' and I1 = 'abc', I2 = 'abc', false)", "true"); + assertExtract("P1 = '20240614' and to_date('20240614', '%Y%m%d') = P2", + "P1 = '20240614' and to_date('20240614', '%Y%m%d') = P2"); + assertExtract("P1 = '20240614' and to_date('20240614', '%Y%m%d') = S1", + "P1 = '20240614'"); + assertExtract("P1 = '20240614' or (select 'a' from t limit 1) = S1", "true"); + } + + private void assertDeterminateEvaluable(String expressionString, boolean evaluable) { + Expression expression = replaceUnboundSlot(PARSER.parseExpression(expressionString), slotMemo); + Assertions.assertEquals(evaluableDetector.detect(expression), evaluable); + } + + private void assertExtract(String expression, String expected) { + Expression needRewriteExpression = replaceUnboundSlot(PARSER.parseExpression(expression), slotMemo); + Expression expectedExpression = replaceUnboundSlot(PARSER.parseExpression(expected), slotMemo); + Expression rewrittenExpression = + PartitionPruneExpressionExtractor.extract(needRewriteExpression, partitionSlots, cascadesContext); + Assertions.assertEquals(expectedExpression, rewrittenExpression); + } + + private Expression replaceUnboundSlot(Expression expression, Map<String, Slot> mem) { + List<Expression> children = Lists.newArrayList(); + boolean hasNewChildren = false; + for (Expression child : expression.children()) { + Expression newChild = replaceUnboundSlot(child, mem); + if (newChild != child) { + hasNewChildren = true; + } + children.add(newChild); + } + if (expression instanceof UnboundSlot) { + String name = ((UnboundSlot) expression).getName(); + mem.putIfAbsent(name, new SlotReference(name, getType(name.charAt(0)))); + return mem.get(name); + } + return hasNewChildren ? expression.withChildren(children) : expression; + } + + private Expression replaceNotNullUnboundSlot(Expression expression, Map<String, Slot> mem) { + List<Expression> children = Lists.newArrayList(); + boolean hasNewChildren = false; + for (Expression child : expression.children()) { + Expression newChild = replaceNotNullUnboundSlot(child, mem); + if (newChild != child) { + hasNewChildren = true; + } + children.add(newChild); + } + if (expression instanceof UnboundSlot) { + String name = ((UnboundSlot) expression).getName(); + mem.putIfAbsent(name, new SlotReference(name, getType(name.charAt(0)), false)); + return mem.get(name); + } + return hasNewChildren ? expression.withChildren(children) : expression; + } + + private Map<String, Slot> createPartitionSlots() { + SlotReference slotReference1 = new SlotReference("P1", StringType.INSTANCE); + SlotReference slotReference2 = new SlotReference("P2", IntegerType.INSTANCE); + SlotReference slotReference3 = new SlotReference("P3", StringType.INSTANCE); + SlotReference slotReference4 = new SlotReference("P4", IntegerType.INSTANCE); + SlotReference slotReference5 = new SlotReference("P5", StringType.INSTANCE); + return ImmutableMap.of( + "P1", slotReference1, + "P2", slotReference2, + "P3", slotReference3, + "P4", slotReference4, + "P5", slotReference5); + } + + private DataType getType(char t) { + switch (t) { + case 'T': + return TinyIntType.INSTANCE; + case 'I': + return IntegerType.INSTANCE; + case 'D': + return DoubleType.INSTANCE; + case 'S': + return StringType.INSTANCE; + case 'B': + return BooleanType.INSTANCE; + default: + return BigIntType.INSTANCE; + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org